sqlglot.parser
1from __future__ import annotations 2 3import itertools 4import logging 5import re 6import typing as t 7from collections import defaultdict 8 9from sqlglot import exp 10from sqlglot.errors import ErrorLevel, ParseError, TokenError, concat_messages, merge_errors 11from sqlglot.helper import apply_index_offset, ensure_list, seq_get 12from sqlglot.time import format_time 13from sqlglot.tokens import Token, Tokenizer, TokenType 14from sqlglot.trie import TrieResult, in_trie, new_trie 15 16if t.TYPE_CHECKING: 17 from sqlglot._typing import E, Lit 18 from sqlglot.dialects.dialect import Dialect, DialectType 19 20 T = t.TypeVar("T") 21 TCeilFloor = t.TypeVar("TCeilFloor", exp.Ceil, exp.Floor) 22 23logger = logging.getLogger("sqlglot") 24 25OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 26 27# Used to detect alphabetical characters and +/- in timestamp literals 28TIME_ZONE_RE: t.Pattern[str] = re.compile(r":.*?[a-zA-Z\+\-]") 29 30 31def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 32 if len(args) == 1 and args[0].is_star: 33 return exp.StarMap(this=args[0]) 34 35 keys = [] 36 values = [] 37 for i in range(0, len(args), 2): 38 keys.append(args[i]) 39 values.append(args[i + 1]) 40 41 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 42 43 44def build_like(args: t.List) -> exp.Escape | exp.Like: 45 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 46 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 47 48 49def binary_range_parser( 50 expr_type: t.Type[exp.Expression], reverse_args: bool = False 51) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 52 def _parse_binary_range( 53 self: Parser, this: t.Optional[exp.Expression] 54 ) -> t.Optional[exp.Expression]: 55 expression = self._parse_bitwise() 56 if reverse_args: 57 this, expression = expression, this 58 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 59 60 return _parse_binary_range 61 62 63def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 64 # Default argument order is base, expression 65 this = seq_get(args, 0) 66 expression = seq_get(args, 1) 67 68 if expression: 69 if not dialect.LOG_BASE_FIRST: 70 this, expression = expression, this 71 return exp.Log(this=this, expression=expression) 72 73 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 74 75 76def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 79 80 81def build_lower(args: t.List) -> exp.Lower | exp.Hex: 82 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 85 86 87def build_upper(args: t.List) -> exp.Upper | exp.Hex: 88 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 89 arg = seq_get(args, 0) 90 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 91 92 93def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 94 def _builder(args: t.List, dialect: Dialect) -> E: 95 expression = expr_type( 96 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 97 ) 98 if len(args) > 2 and expr_type is exp.JSONExtract: 99 expression.set("expressions", args[2:]) 100 101 return expression 102 103 return _builder 104 105 106def build_mod(args: t.List) -> exp.Mod: 107 this = seq_get(args, 0) 108 expression = seq_get(args, 1) 109 110 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 111 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 112 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 113 114 return exp.Mod(this=this, expression=expression) 115 116 117def build_pad(args: t.List, is_left: bool = True): 118 return exp.Pad( 119 this=seq_get(args, 0), 120 expression=seq_get(args, 1), 121 fill_pattern=seq_get(args, 2), 122 is_left=is_left, 123 ) 124 125 126def build_array_constructor( 127 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 128) -> exp.Expression: 129 array_exp = exp_class(expressions=args) 130 131 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 132 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 133 134 return array_exp 135 136 137def build_convert_timezone( 138 args: t.List, default_source_tz: t.Optional[str] = None 139) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 140 if len(args) == 2: 141 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 142 return exp.ConvertTimezone( 143 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 144 ) 145 146 return exp.ConvertTimezone.from_arg_list(args) 147 148 149def build_trim(args: t.List, is_left: bool = True): 150 return exp.Trim( 151 this=seq_get(args, 0), 152 expression=seq_get(args, 1), 153 position="LEADING" if is_left else "TRAILING", 154 ) 155 156 157def build_coalesce( 158 args: t.List, is_nvl: t.Optional[bool] = None, is_null: t.Optional[bool] = None 159) -> exp.Coalesce: 160 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl, is_null=is_null) 161 162 163def build_locate_strposition(args: t.List): 164 return exp.StrPosition( 165 this=seq_get(args, 1), 166 substr=seq_get(args, 0), 167 position=seq_get(args, 2), 168 ) 169 170 171class _Parser(type): 172 def __new__(cls, clsname, bases, attrs): 173 klass = super().__new__(cls, clsname, bases, attrs) 174 175 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 176 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 177 178 return klass 179 180 181class Parser(metaclass=_Parser): 182 """ 183 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 184 185 Args: 186 error_level: The desired error level. 187 Default: ErrorLevel.IMMEDIATE 188 error_message_context: The amount of context to capture from a query string when displaying 189 the error message (in number of characters). 190 Default: 100 191 max_errors: Maximum number of error messages to include in a raised ParseError. 192 This is only relevant if error_level is ErrorLevel.RAISE. 193 Default: 3 194 """ 195 196 FUNCTIONS: t.Dict[str, t.Callable] = { 197 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 198 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 199 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 200 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 201 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 202 ), 203 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 204 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 205 ), 206 "CHAR": lambda args: exp.Chr(expressions=args), 207 "CHR": lambda args: exp.Chr(expressions=args), 208 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 209 "CONCAT": lambda args, dialect: exp.Concat( 210 expressions=args, 211 safe=not dialect.STRICT_STRING_CONCAT, 212 coalesce=dialect.CONCAT_COALESCE, 213 ), 214 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 215 expressions=args, 216 safe=not dialect.STRICT_STRING_CONCAT, 217 coalesce=dialect.CONCAT_COALESCE, 218 ), 219 "CONVERT_TIMEZONE": build_convert_timezone, 220 "DATE_TO_DATE_STR": lambda args: exp.Cast( 221 this=seq_get(args, 0), 222 to=exp.DataType(this=exp.DataType.Type.TEXT), 223 ), 224 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 225 start=seq_get(args, 0), 226 end=seq_get(args, 1), 227 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 228 ), 229 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 230 "HEX": build_hex, 231 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 232 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 233 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 234 "LIKE": build_like, 235 "LOG": build_logarithm, 236 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 237 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 238 "LOWER": build_lower, 239 "LPAD": lambda args: build_pad(args), 240 "LEFTPAD": lambda args: build_pad(args), 241 "LTRIM": lambda args: build_trim(args), 242 "MOD": build_mod, 243 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 244 "RPAD": lambda args: build_pad(args, is_left=False), 245 "RTRIM": lambda args: build_trim(args, is_left=False), 246 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 247 if len(args) != 2 248 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 249 "STRPOS": exp.StrPosition.from_arg_list, 250 "CHARINDEX": lambda args: build_locate_strposition(args), 251 "INSTR": exp.StrPosition.from_arg_list, 252 "LOCATE": lambda args: build_locate_strposition(args), 253 "TIME_TO_TIME_STR": lambda args: exp.Cast( 254 this=seq_get(args, 0), 255 to=exp.DataType(this=exp.DataType.Type.TEXT), 256 ), 257 "TO_HEX": build_hex, 258 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 259 this=exp.Cast( 260 this=seq_get(args, 0), 261 to=exp.DataType(this=exp.DataType.Type.TEXT), 262 ), 263 start=exp.Literal.number(1), 264 length=exp.Literal.number(10), 265 ), 266 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 267 "UPPER": build_upper, 268 "VAR_MAP": build_var_map, 269 } 270 271 NO_PAREN_FUNCTIONS = { 272 TokenType.CURRENT_DATE: exp.CurrentDate, 273 TokenType.CURRENT_DATETIME: exp.CurrentDate, 274 TokenType.CURRENT_TIME: exp.CurrentTime, 275 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 276 TokenType.CURRENT_USER: exp.CurrentUser, 277 } 278 279 STRUCT_TYPE_TOKENS = { 280 TokenType.NESTED, 281 TokenType.OBJECT, 282 TokenType.STRUCT, 283 TokenType.UNION, 284 } 285 286 NESTED_TYPE_TOKENS = { 287 TokenType.ARRAY, 288 TokenType.LIST, 289 TokenType.LOWCARDINALITY, 290 TokenType.MAP, 291 TokenType.NULLABLE, 292 TokenType.RANGE, 293 *STRUCT_TYPE_TOKENS, 294 } 295 296 ENUM_TYPE_TOKENS = { 297 TokenType.DYNAMIC, 298 TokenType.ENUM, 299 TokenType.ENUM8, 300 TokenType.ENUM16, 301 } 302 303 AGGREGATE_TYPE_TOKENS = { 304 TokenType.AGGREGATEFUNCTION, 305 TokenType.SIMPLEAGGREGATEFUNCTION, 306 } 307 308 TYPE_TOKENS = { 309 TokenType.BIT, 310 TokenType.BOOLEAN, 311 TokenType.TINYINT, 312 TokenType.UTINYINT, 313 TokenType.SMALLINT, 314 TokenType.USMALLINT, 315 TokenType.INT, 316 TokenType.UINT, 317 TokenType.BIGINT, 318 TokenType.UBIGINT, 319 TokenType.INT128, 320 TokenType.UINT128, 321 TokenType.INT256, 322 TokenType.UINT256, 323 TokenType.MEDIUMINT, 324 TokenType.UMEDIUMINT, 325 TokenType.FIXEDSTRING, 326 TokenType.FLOAT, 327 TokenType.DOUBLE, 328 TokenType.UDOUBLE, 329 TokenType.CHAR, 330 TokenType.NCHAR, 331 TokenType.VARCHAR, 332 TokenType.NVARCHAR, 333 TokenType.BPCHAR, 334 TokenType.TEXT, 335 TokenType.MEDIUMTEXT, 336 TokenType.LONGTEXT, 337 TokenType.BLOB, 338 TokenType.MEDIUMBLOB, 339 TokenType.LONGBLOB, 340 TokenType.BINARY, 341 TokenType.VARBINARY, 342 TokenType.JSON, 343 TokenType.JSONB, 344 TokenType.INTERVAL, 345 TokenType.TINYBLOB, 346 TokenType.TINYTEXT, 347 TokenType.TIME, 348 TokenType.TIMETZ, 349 TokenType.TIMESTAMP, 350 TokenType.TIMESTAMP_S, 351 TokenType.TIMESTAMP_MS, 352 TokenType.TIMESTAMP_NS, 353 TokenType.TIMESTAMPTZ, 354 TokenType.TIMESTAMPLTZ, 355 TokenType.TIMESTAMPNTZ, 356 TokenType.DATETIME, 357 TokenType.DATETIME2, 358 TokenType.DATETIME64, 359 TokenType.SMALLDATETIME, 360 TokenType.DATE, 361 TokenType.DATE32, 362 TokenType.INT4RANGE, 363 TokenType.INT4MULTIRANGE, 364 TokenType.INT8RANGE, 365 TokenType.INT8MULTIRANGE, 366 TokenType.NUMRANGE, 367 TokenType.NUMMULTIRANGE, 368 TokenType.TSRANGE, 369 TokenType.TSMULTIRANGE, 370 TokenType.TSTZRANGE, 371 TokenType.TSTZMULTIRANGE, 372 TokenType.DATERANGE, 373 TokenType.DATEMULTIRANGE, 374 TokenType.DECIMAL, 375 TokenType.DECIMAL32, 376 TokenType.DECIMAL64, 377 TokenType.DECIMAL128, 378 TokenType.DECIMAL256, 379 TokenType.UDECIMAL, 380 TokenType.BIGDECIMAL, 381 TokenType.UUID, 382 TokenType.GEOGRAPHY, 383 TokenType.GEOGRAPHYPOINT, 384 TokenType.GEOMETRY, 385 TokenType.POINT, 386 TokenType.RING, 387 TokenType.LINESTRING, 388 TokenType.MULTILINESTRING, 389 TokenType.POLYGON, 390 TokenType.MULTIPOLYGON, 391 TokenType.HLLSKETCH, 392 TokenType.HSTORE, 393 TokenType.PSEUDO_TYPE, 394 TokenType.SUPER, 395 TokenType.SERIAL, 396 TokenType.SMALLSERIAL, 397 TokenType.BIGSERIAL, 398 TokenType.XML, 399 TokenType.YEAR, 400 TokenType.USERDEFINED, 401 TokenType.MONEY, 402 TokenType.SMALLMONEY, 403 TokenType.ROWVERSION, 404 TokenType.IMAGE, 405 TokenType.VARIANT, 406 TokenType.VECTOR, 407 TokenType.VOID, 408 TokenType.OBJECT, 409 TokenType.OBJECT_IDENTIFIER, 410 TokenType.INET, 411 TokenType.IPADDRESS, 412 TokenType.IPPREFIX, 413 TokenType.IPV4, 414 TokenType.IPV6, 415 TokenType.UNKNOWN, 416 TokenType.NOTHING, 417 TokenType.NULL, 418 TokenType.NAME, 419 TokenType.TDIGEST, 420 TokenType.DYNAMIC, 421 *ENUM_TYPE_TOKENS, 422 *NESTED_TYPE_TOKENS, 423 *AGGREGATE_TYPE_TOKENS, 424 } 425 426 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 427 TokenType.BIGINT: TokenType.UBIGINT, 428 TokenType.INT: TokenType.UINT, 429 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 430 TokenType.SMALLINT: TokenType.USMALLINT, 431 TokenType.TINYINT: TokenType.UTINYINT, 432 TokenType.DECIMAL: TokenType.UDECIMAL, 433 TokenType.DOUBLE: TokenType.UDOUBLE, 434 } 435 436 SUBQUERY_PREDICATES = { 437 TokenType.ANY: exp.Any, 438 TokenType.ALL: exp.All, 439 TokenType.EXISTS: exp.Exists, 440 TokenType.SOME: exp.Any, 441 } 442 443 RESERVED_TOKENS = { 444 *Tokenizer.SINGLE_TOKENS.values(), 445 TokenType.SELECT, 446 } - {TokenType.IDENTIFIER} 447 448 DB_CREATABLES = { 449 TokenType.DATABASE, 450 TokenType.DICTIONARY, 451 TokenType.FILE_FORMAT, 452 TokenType.MODEL, 453 TokenType.NAMESPACE, 454 TokenType.SCHEMA, 455 TokenType.SEMANTIC_VIEW, 456 TokenType.SEQUENCE, 457 TokenType.SINK, 458 TokenType.SOURCE, 459 TokenType.STAGE, 460 TokenType.STORAGE_INTEGRATION, 461 TokenType.STREAMLIT, 462 TokenType.TABLE, 463 TokenType.TAG, 464 TokenType.VIEW, 465 TokenType.WAREHOUSE, 466 } 467 468 CREATABLES = { 469 TokenType.COLUMN, 470 TokenType.CONSTRAINT, 471 TokenType.FOREIGN_KEY, 472 TokenType.FUNCTION, 473 TokenType.INDEX, 474 TokenType.PROCEDURE, 475 *DB_CREATABLES, 476 } 477 478 ALTERABLES = { 479 TokenType.INDEX, 480 TokenType.TABLE, 481 TokenType.VIEW, 482 TokenType.SESSION, 483 } 484 485 # Tokens that can represent identifiers 486 ID_VAR_TOKENS = { 487 TokenType.ALL, 488 TokenType.ATTACH, 489 TokenType.VAR, 490 TokenType.ANTI, 491 TokenType.APPLY, 492 TokenType.ASC, 493 TokenType.ASOF, 494 TokenType.AUTO_INCREMENT, 495 TokenType.BEGIN, 496 TokenType.BPCHAR, 497 TokenType.CACHE, 498 TokenType.CASE, 499 TokenType.COLLATE, 500 TokenType.COMMAND, 501 TokenType.COMMENT, 502 TokenType.COMMIT, 503 TokenType.CONSTRAINT, 504 TokenType.COPY, 505 TokenType.CUBE, 506 TokenType.CURRENT_SCHEMA, 507 TokenType.DEFAULT, 508 TokenType.DELETE, 509 TokenType.DESC, 510 TokenType.DESCRIBE, 511 TokenType.DETACH, 512 TokenType.DICTIONARY, 513 TokenType.DIV, 514 TokenType.END, 515 TokenType.EXECUTE, 516 TokenType.EXPORT, 517 TokenType.ESCAPE, 518 TokenType.FALSE, 519 TokenType.FIRST, 520 TokenType.FILTER, 521 TokenType.FINAL, 522 TokenType.FORMAT, 523 TokenType.FULL, 524 TokenType.GET, 525 TokenType.IDENTIFIER, 526 TokenType.IS, 527 TokenType.ISNULL, 528 TokenType.INTERVAL, 529 TokenType.KEEP, 530 TokenType.KILL, 531 TokenType.LEFT, 532 TokenType.LIMIT, 533 TokenType.LOAD, 534 TokenType.LOCK, 535 TokenType.MERGE, 536 TokenType.NATURAL, 537 TokenType.NEXT, 538 TokenType.OFFSET, 539 TokenType.OPERATOR, 540 TokenType.ORDINALITY, 541 TokenType.OVERLAPS, 542 TokenType.OVERWRITE, 543 TokenType.PARTITION, 544 TokenType.PERCENT, 545 TokenType.PIVOT, 546 TokenType.PRAGMA, 547 TokenType.PUT, 548 TokenType.RANGE, 549 TokenType.RECURSIVE, 550 TokenType.REFERENCES, 551 TokenType.REFRESH, 552 TokenType.RENAME, 553 TokenType.REPLACE, 554 TokenType.RIGHT, 555 TokenType.ROLLUP, 556 TokenType.ROW, 557 TokenType.ROWS, 558 TokenType.SEMI, 559 TokenType.SET, 560 TokenType.SETTINGS, 561 TokenType.SHOW, 562 TokenType.TEMPORARY, 563 TokenType.TOP, 564 TokenType.TRUE, 565 TokenType.TRUNCATE, 566 TokenType.UNIQUE, 567 TokenType.UNNEST, 568 TokenType.UNPIVOT, 569 TokenType.UPDATE, 570 TokenType.USE, 571 TokenType.VOLATILE, 572 TokenType.WINDOW, 573 *ALTERABLES, 574 *CREATABLES, 575 *SUBQUERY_PREDICATES, 576 *TYPE_TOKENS, 577 *NO_PAREN_FUNCTIONS, 578 } 579 ID_VAR_TOKENS.remove(TokenType.UNION) 580 581 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 582 TokenType.ANTI, 583 TokenType.ASOF, 584 TokenType.FULL, 585 TokenType.LEFT, 586 TokenType.LOCK, 587 TokenType.NATURAL, 588 TokenType.RIGHT, 589 TokenType.SEMI, 590 TokenType.WINDOW, 591 } 592 593 ALIAS_TOKENS = ID_VAR_TOKENS 594 595 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 596 597 ARRAY_CONSTRUCTORS = { 598 "ARRAY": exp.Array, 599 "LIST": exp.List, 600 } 601 602 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 603 604 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 605 606 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 607 608 FUNC_TOKENS = { 609 TokenType.COLLATE, 610 TokenType.COMMAND, 611 TokenType.CURRENT_DATE, 612 TokenType.CURRENT_DATETIME, 613 TokenType.CURRENT_SCHEMA, 614 TokenType.CURRENT_TIMESTAMP, 615 TokenType.CURRENT_TIME, 616 TokenType.CURRENT_USER, 617 TokenType.FILTER, 618 TokenType.FIRST, 619 TokenType.FORMAT, 620 TokenType.GET, 621 TokenType.GLOB, 622 TokenType.IDENTIFIER, 623 TokenType.INDEX, 624 TokenType.ISNULL, 625 TokenType.ILIKE, 626 TokenType.INSERT, 627 TokenType.LIKE, 628 TokenType.MERGE, 629 TokenType.NEXT, 630 TokenType.OFFSET, 631 TokenType.PRIMARY_KEY, 632 TokenType.RANGE, 633 TokenType.REPLACE, 634 TokenType.RLIKE, 635 TokenType.ROW, 636 TokenType.UNNEST, 637 TokenType.VAR, 638 TokenType.LEFT, 639 TokenType.RIGHT, 640 TokenType.SEQUENCE, 641 TokenType.DATE, 642 TokenType.DATETIME, 643 TokenType.TABLE, 644 TokenType.TIMESTAMP, 645 TokenType.TIMESTAMPTZ, 646 TokenType.TRUNCATE, 647 TokenType.UTC_DATE, 648 TokenType.UTC_TIME, 649 TokenType.UTC_TIMESTAMP, 650 TokenType.WINDOW, 651 TokenType.XOR, 652 *TYPE_TOKENS, 653 *SUBQUERY_PREDICATES, 654 } 655 656 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 657 TokenType.AND: exp.And, 658 } 659 660 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 661 TokenType.COLON_EQ: exp.PropertyEQ, 662 } 663 664 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 665 TokenType.OR: exp.Or, 666 } 667 668 EQUALITY = { 669 TokenType.EQ: exp.EQ, 670 TokenType.NEQ: exp.NEQ, 671 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 672 } 673 674 COMPARISON = { 675 TokenType.GT: exp.GT, 676 TokenType.GTE: exp.GTE, 677 TokenType.LT: exp.LT, 678 TokenType.LTE: exp.LTE, 679 } 680 681 BITWISE = { 682 TokenType.AMP: exp.BitwiseAnd, 683 TokenType.CARET: exp.BitwiseXor, 684 TokenType.PIPE: exp.BitwiseOr, 685 } 686 687 TERM = { 688 TokenType.DASH: exp.Sub, 689 TokenType.PLUS: exp.Add, 690 TokenType.MOD: exp.Mod, 691 TokenType.COLLATE: exp.Collate, 692 } 693 694 FACTOR = { 695 TokenType.DIV: exp.IntDiv, 696 TokenType.LR_ARROW: exp.Distance, 697 TokenType.SLASH: exp.Div, 698 TokenType.STAR: exp.Mul, 699 } 700 701 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 702 703 TIMES = { 704 TokenType.TIME, 705 TokenType.TIMETZ, 706 } 707 708 TIMESTAMPS = { 709 TokenType.TIMESTAMP, 710 TokenType.TIMESTAMPNTZ, 711 TokenType.TIMESTAMPTZ, 712 TokenType.TIMESTAMPLTZ, 713 *TIMES, 714 } 715 716 SET_OPERATIONS = { 717 TokenType.UNION, 718 TokenType.INTERSECT, 719 TokenType.EXCEPT, 720 } 721 722 JOIN_METHODS = { 723 TokenType.ASOF, 724 TokenType.NATURAL, 725 TokenType.POSITIONAL, 726 } 727 728 JOIN_SIDES = { 729 TokenType.LEFT, 730 TokenType.RIGHT, 731 TokenType.FULL, 732 } 733 734 JOIN_KINDS = { 735 TokenType.ANTI, 736 TokenType.CROSS, 737 TokenType.INNER, 738 TokenType.OUTER, 739 TokenType.SEMI, 740 TokenType.STRAIGHT_JOIN, 741 } 742 743 JOIN_HINTS: t.Set[str] = set() 744 745 LAMBDAS = { 746 TokenType.ARROW: lambda self, expressions: self.expression( 747 exp.Lambda, 748 this=self._replace_lambda( 749 self._parse_assignment(), 750 expressions, 751 ), 752 expressions=expressions, 753 ), 754 TokenType.FARROW: lambda self, expressions: self.expression( 755 exp.Kwarg, 756 this=exp.var(expressions[0].name), 757 expression=self._parse_assignment(), 758 ), 759 } 760 761 COLUMN_OPERATORS = { 762 TokenType.DOT: None, 763 TokenType.DOTCOLON: lambda self, this, to: self.expression( 764 exp.JSONCast, 765 this=this, 766 to=to, 767 ), 768 TokenType.DCOLON: lambda self, this, to: self.build_cast( 769 strict=self.STRICT_CAST, this=this, to=to 770 ), 771 TokenType.ARROW: lambda self, this, path: self.expression( 772 exp.JSONExtract, 773 this=this, 774 expression=self.dialect.to_json_path(path), 775 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 776 ), 777 TokenType.DARROW: lambda self, this, path: self.expression( 778 exp.JSONExtractScalar, 779 this=this, 780 expression=self.dialect.to_json_path(path), 781 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 782 ), 783 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 784 exp.JSONBExtract, 785 this=this, 786 expression=path, 787 ), 788 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 789 exp.JSONBExtractScalar, 790 this=this, 791 expression=path, 792 ), 793 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 794 exp.JSONBContains, 795 this=this, 796 expression=key, 797 ), 798 } 799 800 CAST_COLUMN_OPERATORS = { 801 TokenType.DOTCOLON, 802 TokenType.DCOLON, 803 } 804 805 EXPRESSION_PARSERS = { 806 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 807 exp.Column: lambda self: self._parse_column(), 808 exp.Condition: lambda self: self._parse_assignment(), 809 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 810 exp.Expression: lambda self: self._parse_expression(), 811 exp.From: lambda self: self._parse_from(joins=True), 812 exp.Group: lambda self: self._parse_group(), 813 exp.Having: lambda self: self._parse_having(), 814 exp.Hint: lambda self: self._parse_hint_body(), 815 exp.Identifier: lambda self: self._parse_id_var(), 816 exp.Join: lambda self: self._parse_join(), 817 exp.Lambda: lambda self: self._parse_lambda(), 818 exp.Lateral: lambda self: self._parse_lateral(), 819 exp.Limit: lambda self: self._parse_limit(), 820 exp.Offset: lambda self: self._parse_offset(), 821 exp.Order: lambda self: self._parse_order(), 822 exp.Ordered: lambda self: self._parse_ordered(), 823 exp.Properties: lambda self: self._parse_properties(), 824 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 825 exp.Qualify: lambda self: self._parse_qualify(), 826 exp.Returning: lambda self: self._parse_returning(), 827 exp.Select: lambda self: self._parse_select(), 828 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 829 exp.Table: lambda self: self._parse_table_parts(), 830 exp.TableAlias: lambda self: self._parse_table_alias(), 831 exp.Tuple: lambda self: self._parse_value(values=False), 832 exp.Whens: lambda self: self._parse_when_matched(), 833 exp.Where: lambda self: self._parse_where(), 834 exp.Window: lambda self: self._parse_named_window(), 835 exp.With: lambda self: self._parse_with(), 836 "JOIN_TYPE": lambda self: self._parse_join_parts(), 837 } 838 839 STATEMENT_PARSERS = { 840 TokenType.ALTER: lambda self: self._parse_alter(), 841 TokenType.ANALYZE: lambda self: self._parse_analyze(), 842 TokenType.BEGIN: lambda self: self._parse_transaction(), 843 TokenType.CACHE: lambda self: self._parse_cache(), 844 TokenType.COMMENT: lambda self: self._parse_comment(), 845 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 846 TokenType.COPY: lambda self: self._parse_copy(), 847 TokenType.CREATE: lambda self: self._parse_create(), 848 TokenType.DELETE: lambda self: self._parse_delete(), 849 TokenType.DESC: lambda self: self._parse_describe(), 850 TokenType.DESCRIBE: lambda self: self._parse_describe(), 851 TokenType.DROP: lambda self: self._parse_drop(), 852 TokenType.GRANT: lambda self: self._parse_grant(), 853 TokenType.REVOKE: lambda self: self._parse_revoke(), 854 TokenType.INSERT: lambda self: self._parse_insert(), 855 TokenType.KILL: lambda self: self._parse_kill(), 856 TokenType.LOAD: lambda self: self._parse_load(), 857 TokenType.MERGE: lambda self: self._parse_merge(), 858 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 859 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 860 TokenType.REFRESH: lambda self: self._parse_refresh(), 861 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 862 TokenType.SET: lambda self: self._parse_set(), 863 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 864 TokenType.UNCACHE: lambda self: self._parse_uncache(), 865 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 866 TokenType.UPDATE: lambda self: self._parse_update(), 867 TokenType.USE: lambda self: self._parse_use(), 868 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 869 } 870 871 UNARY_PARSERS = { 872 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 873 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 874 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 875 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 876 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 877 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 878 } 879 880 STRING_PARSERS = { 881 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 882 exp.RawString, this=token.text 883 ), 884 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 885 exp.National, this=token.text 886 ), 887 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 888 TokenType.STRING: lambda self, token: self.expression( 889 exp.Literal, this=token.text, is_string=True 890 ), 891 TokenType.UNICODE_STRING: lambda self, token: self.expression( 892 exp.UnicodeString, 893 this=token.text, 894 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 895 ), 896 } 897 898 NUMERIC_PARSERS = { 899 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 900 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 901 TokenType.HEX_STRING: lambda self, token: self.expression( 902 exp.HexString, 903 this=token.text, 904 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 905 ), 906 TokenType.NUMBER: lambda self, token: self.expression( 907 exp.Literal, this=token.text, is_string=False 908 ), 909 } 910 911 PRIMARY_PARSERS = { 912 **STRING_PARSERS, 913 **NUMERIC_PARSERS, 914 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 915 TokenType.NULL: lambda self, _: self.expression(exp.Null), 916 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 917 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 918 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 919 TokenType.STAR: lambda self, _: self._parse_star_ops(), 920 } 921 922 PLACEHOLDER_PARSERS = { 923 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 924 TokenType.PARAMETER: lambda self: self._parse_parameter(), 925 TokenType.COLON: lambda self: ( 926 self.expression(exp.Placeholder, this=self._prev.text) 927 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 928 else None 929 ), 930 } 931 932 RANGE_PARSERS = { 933 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 934 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 935 TokenType.GLOB: binary_range_parser(exp.Glob), 936 TokenType.ILIKE: binary_range_parser(exp.ILike), 937 TokenType.IN: lambda self, this: self._parse_in(this), 938 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 939 TokenType.IS: lambda self, this: self._parse_is(this), 940 TokenType.LIKE: binary_range_parser(exp.Like), 941 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 942 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 943 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 944 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 945 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 946 TokenType.QMARK_AMP: binary_range_parser(exp.JSONBContainsAllTopKeys), 947 TokenType.QMARK_PIPE: binary_range_parser(exp.JSONBContainsAnyTopKeys), 948 TokenType.HASH_DASH: binary_range_parser(exp.JSONBDeleteAtPath), 949 } 950 951 PIPE_SYNTAX_TRANSFORM_PARSERS = { 952 "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), 953 "AS": lambda self, query: self._build_pipe_cte( 954 query, [exp.Star()], self._parse_table_alias() 955 ), 956 "EXTEND": lambda self, query: self._parse_pipe_syntax_extend(query), 957 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 958 "ORDER BY": lambda self, query: query.order_by( 959 self._parse_order(), append=False, copy=False 960 ), 961 "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 962 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 963 "TABLESAMPLE": lambda self, query: self._parse_pipe_syntax_tablesample(query), 964 "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 965 "WHERE": lambda self, query: query.where(self._parse_where(), copy=False), 966 } 967 968 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 969 "ALLOWED_VALUES": lambda self: self.expression( 970 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 971 ), 972 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 973 "AUTO": lambda self: self._parse_auto_property(), 974 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 975 "BACKUP": lambda self: self.expression( 976 exp.BackupProperty, this=self._parse_var(any_token=True) 977 ), 978 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 979 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 980 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 981 "CHECKSUM": lambda self: self._parse_checksum(), 982 "CLUSTER BY": lambda self: self._parse_cluster(), 983 "CLUSTERED": lambda self: self._parse_clustered_by(), 984 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 985 exp.CollateProperty, **kwargs 986 ), 987 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 988 "CONTAINS": lambda self: self._parse_contains_property(), 989 "COPY": lambda self: self._parse_copy_property(), 990 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 991 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 992 "DEFINER": lambda self: self._parse_definer(), 993 "DETERMINISTIC": lambda self: self.expression( 994 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 995 ), 996 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 997 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 998 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 999 "DISTKEY": lambda self: self._parse_distkey(), 1000 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 1001 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 1002 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 1003 "ENVIRONMENT": lambda self: self.expression( 1004 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 1005 ), 1006 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 1007 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 1008 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 1009 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1010 "FREESPACE": lambda self: self._parse_freespace(), 1011 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 1012 "HEAP": lambda self: self.expression(exp.HeapProperty), 1013 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 1014 "IMMUTABLE": lambda self: self.expression( 1015 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 1016 ), 1017 "INHERITS": lambda self: self.expression( 1018 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 1019 ), 1020 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 1021 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 1022 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 1023 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 1024 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 1025 "LIKE": lambda self: self._parse_create_like(), 1026 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1027 "LOCK": lambda self: self._parse_locking(), 1028 "LOCKING": lambda self: self._parse_locking(), 1029 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1030 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 1031 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1032 "MODIFIES": lambda self: self._parse_modifies_property(), 1033 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 1034 "NO": lambda self: self._parse_no_property(), 1035 "ON": lambda self: self._parse_on_property(), 1036 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1037 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1038 "PARTITION": lambda self: self._parse_partitioned_of(), 1039 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1040 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1041 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1042 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1043 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1044 "READS": lambda self: self._parse_reads_property(), 1045 "REMOTE": lambda self: self._parse_remote_with_connection(), 1046 "RETURNS": lambda self: self._parse_returns(), 1047 "STRICT": lambda self: self.expression(exp.StrictProperty), 1048 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1049 "ROW": lambda self: self._parse_row(), 1050 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1051 "SAMPLE": lambda self: self.expression( 1052 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1053 ), 1054 "SECURE": lambda self: self.expression(exp.SecureProperty), 1055 "SECURITY": lambda self: self._parse_security(), 1056 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1057 "SETTINGS": lambda self: self._parse_settings_property(), 1058 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1059 "SORTKEY": lambda self: self._parse_sortkey(), 1060 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1061 "STABLE": lambda self: self.expression( 1062 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1063 ), 1064 "STORED": lambda self: self._parse_stored(), 1065 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1066 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1067 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1068 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1069 "TO": lambda self: self._parse_to_table(), 1070 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1071 "TRANSFORM": lambda self: self.expression( 1072 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1073 ), 1074 "TTL": lambda self: self._parse_ttl(), 1075 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1076 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1077 "VOLATILE": lambda self: self._parse_volatile_property(), 1078 "WITH": lambda self: self._parse_with_property(), 1079 } 1080 1081 CONSTRAINT_PARSERS = { 1082 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1083 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1084 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1085 "CHARACTER SET": lambda self: self.expression( 1086 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1087 ), 1088 "CHECK": lambda self: self.expression( 1089 exp.CheckColumnConstraint, 1090 this=self._parse_wrapped(self._parse_assignment), 1091 enforced=self._match_text_seq("ENFORCED"), 1092 ), 1093 "COLLATE": lambda self: self.expression( 1094 exp.CollateColumnConstraint, 1095 this=self._parse_identifier() or self._parse_column(), 1096 ), 1097 "COMMENT": lambda self: self.expression( 1098 exp.CommentColumnConstraint, this=self._parse_string() 1099 ), 1100 "COMPRESS": lambda self: self._parse_compress(), 1101 "CLUSTERED": lambda self: self.expression( 1102 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1103 ), 1104 "NONCLUSTERED": lambda self: self.expression( 1105 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1106 ), 1107 "DEFAULT": lambda self: self.expression( 1108 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1109 ), 1110 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1111 "EPHEMERAL": lambda self: self.expression( 1112 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1113 ), 1114 "EXCLUDE": lambda self: self.expression( 1115 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1116 ), 1117 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1118 "FORMAT": lambda self: self.expression( 1119 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1120 ), 1121 "GENERATED": lambda self: self._parse_generated_as_identity(), 1122 "IDENTITY": lambda self: self._parse_auto_increment(), 1123 "INLINE": lambda self: self._parse_inline(), 1124 "LIKE": lambda self: self._parse_create_like(), 1125 "NOT": lambda self: self._parse_not_constraint(), 1126 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1127 "ON": lambda self: ( 1128 self._match(TokenType.UPDATE) 1129 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1130 ) 1131 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1132 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1133 "PERIOD": lambda self: self._parse_period_for_system_time(), 1134 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1135 "REFERENCES": lambda self: self._parse_references(match=False), 1136 "TITLE": lambda self: self.expression( 1137 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1138 ), 1139 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1140 "UNIQUE": lambda self: self._parse_unique(), 1141 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1142 "WATERMARK": lambda self: self.expression( 1143 exp.WatermarkColumnConstraint, 1144 this=self._match(TokenType.FOR) and self._parse_column(), 1145 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1146 ), 1147 "WITH": lambda self: self.expression( 1148 exp.Properties, expressions=self._parse_wrapped_properties() 1149 ), 1150 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1151 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1152 } 1153 1154 def _parse_partitioned_by_bucket_or_truncate(self) -> t.Optional[exp.Expression]: 1155 if not self._match(TokenType.L_PAREN, advance=False): 1156 # Partitioning by bucket or truncate follows the syntax: 1157 # PARTITION BY (BUCKET(..) | TRUNCATE(..)) 1158 # If we don't have parenthesis after each keyword, we should instead parse this as an identifier 1159 self._retreat(self._index - 1) 1160 return None 1161 1162 klass = ( 1163 exp.PartitionedByBucket 1164 if self._prev.text.upper() == "BUCKET" 1165 else exp.PartitionByTruncate 1166 ) 1167 1168 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1169 this, expression = seq_get(args, 0), seq_get(args, 1) 1170 1171 if isinstance(this, exp.Literal): 1172 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1173 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1174 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1175 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1176 # 1177 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1178 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1179 this, expression = expression, this 1180 1181 return self.expression(klass, this=this, expression=expression) 1182 1183 ALTER_PARSERS = { 1184 "ADD": lambda self: self._parse_alter_table_add(), 1185 "AS": lambda self: self._parse_select(), 1186 "ALTER": lambda self: self._parse_alter_table_alter(), 1187 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1188 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1189 "DROP": lambda self: self._parse_alter_table_drop(), 1190 "RENAME": lambda self: self._parse_alter_table_rename(), 1191 "SET": lambda self: self._parse_alter_table_set(), 1192 "SWAP": lambda self: self.expression( 1193 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1194 ), 1195 } 1196 1197 ALTER_ALTER_PARSERS = { 1198 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1199 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1200 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1201 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1202 } 1203 1204 SCHEMA_UNNAMED_CONSTRAINTS = { 1205 "CHECK", 1206 "EXCLUDE", 1207 "FOREIGN KEY", 1208 "LIKE", 1209 "PERIOD", 1210 "PRIMARY KEY", 1211 "UNIQUE", 1212 "WATERMARK", 1213 "BUCKET", 1214 "TRUNCATE", 1215 } 1216 1217 NO_PAREN_FUNCTION_PARSERS = { 1218 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1219 "CASE": lambda self: self._parse_case(), 1220 "CONNECT_BY_ROOT": lambda self: self.expression( 1221 exp.ConnectByRoot, this=self._parse_column() 1222 ), 1223 "IF": lambda self: self._parse_if(), 1224 } 1225 1226 INVALID_FUNC_NAME_TOKENS = { 1227 TokenType.IDENTIFIER, 1228 TokenType.STRING, 1229 } 1230 1231 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1232 1233 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1234 1235 FUNCTION_PARSERS = { 1236 **{ 1237 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1238 }, 1239 **{ 1240 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1241 }, 1242 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1243 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1244 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1245 "DECODE": lambda self: self._parse_decode(), 1246 "EXTRACT": lambda self: self._parse_extract(), 1247 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1248 "GAP_FILL": lambda self: self._parse_gap_fill(), 1249 "JSON_OBJECT": lambda self: self._parse_json_object(), 1250 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1251 "JSON_TABLE": lambda self: self._parse_json_table(), 1252 "MATCH": lambda self: self._parse_match_against(), 1253 "NORMALIZE": lambda self: self._parse_normalize(), 1254 "OPENJSON": lambda self: self._parse_open_json(), 1255 "OVERLAY": lambda self: self._parse_overlay(), 1256 "POSITION": lambda self: self._parse_position(), 1257 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1258 "STRING_AGG": lambda self: self._parse_string_agg(), 1259 "SUBSTRING": lambda self: self._parse_substring(), 1260 "TRIM": lambda self: self._parse_trim(), 1261 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1262 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1263 "XMLELEMENT": lambda self: self.expression( 1264 exp.XMLElement, 1265 this=self._match_text_seq("NAME") and self._parse_id_var(), 1266 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1267 ), 1268 "XMLTABLE": lambda self: self._parse_xml_table(), 1269 } 1270 1271 QUERY_MODIFIER_PARSERS = { 1272 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1273 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1274 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1275 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1276 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1277 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1278 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1279 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1280 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1281 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1282 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1283 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1284 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1285 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1286 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1287 TokenType.CLUSTER_BY: lambda self: ( 1288 "cluster", 1289 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1290 ), 1291 TokenType.DISTRIBUTE_BY: lambda self: ( 1292 "distribute", 1293 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1294 ), 1295 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1296 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1297 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1298 } 1299 QUERY_MODIFIER_TOKENS = set(QUERY_MODIFIER_PARSERS) 1300 1301 SET_PARSERS = { 1302 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1303 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1304 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1305 "TRANSACTION": lambda self: self._parse_set_transaction(), 1306 } 1307 1308 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1309 1310 TYPE_LITERAL_PARSERS = { 1311 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1312 } 1313 1314 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1315 1316 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1317 1318 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1319 1320 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1321 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1322 "ISOLATION": ( 1323 ("LEVEL", "REPEATABLE", "READ"), 1324 ("LEVEL", "READ", "COMMITTED"), 1325 ("LEVEL", "READ", "UNCOMITTED"), 1326 ("LEVEL", "SERIALIZABLE"), 1327 ), 1328 "READ": ("WRITE", "ONLY"), 1329 } 1330 1331 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1332 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1333 ) 1334 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1335 1336 CREATE_SEQUENCE: OPTIONS_TYPE = { 1337 "SCALE": ("EXTEND", "NOEXTEND"), 1338 "SHARD": ("EXTEND", "NOEXTEND"), 1339 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1340 **dict.fromkeys( 1341 ( 1342 "SESSION", 1343 "GLOBAL", 1344 "KEEP", 1345 "NOKEEP", 1346 "ORDER", 1347 "NOORDER", 1348 "NOCACHE", 1349 "CYCLE", 1350 "NOCYCLE", 1351 "NOMINVALUE", 1352 "NOMAXVALUE", 1353 "NOSCALE", 1354 "NOSHARD", 1355 ), 1356 tuple(), 1357 ), 1358 } 1359 1360 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1361 1362 USABLES: OPTIONS_TYPE = dict.fromkeys( 1363 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1364 ) 1365 1366 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1367 1368 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1369 "TYPE": ("EVOLUTION",), 1370 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1371 } 1372 1373 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1374 1375 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1376 1377 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1378 "NOT": ("ENFORCED",), 1379 "MATCH": ( 1380 "FULL", 1381 "PARTIAL", 1382 "SIMPLE", 1383 ), 1384 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1385 "USING": ( 1386 "BTREE", 1387 "HASH", 1388 ), 1389 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1390 } 1391 1392 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1393 "NO": ("OTHERS",), 1394 "CURRENT": ("ROW",), 1395 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1396 } 1397 1398 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1399 1400 CLONE_KEYWORDS = {"CLONE", "COPY"} 1401 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1402 HISTORICAL_DATA_KIND = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1403 1404 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1405 1406 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1407 1408 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1409 1410 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1411 1412 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1413 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1414 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1415 1416 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1417 1418 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1419 1420 ADD_CONSTRAINT_TOKENS = { 1421 TokenType.CONSTRAINT, 1422 TokenType.FOREIGN_KEY, 1423 TokenType.INDEX, 1424 TokenType.KEY, 1425 TokenType.PRIMARY_KEY, 1426 TokenType.UNIQUE, 1427 } 1428 1429 DISTINCT_TOKENS = {TokenType.DISTINCT} 1430 1431 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1432 1433 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1434 1435 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1436 1437 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1438 1439 ODBC_DATETIME_LITERALS = { 1440 "d": exp.Date, 1441 "t": exp.Time, 1442 "ts": exp.Timestamp, 1443 } 1444 1445 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1446 1447 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1448 1449 # The style options for the DESCRIBE statement 1450 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1451 1452 # The style options for the ANALYZE statement 1453 ANALYZE_STYLES = { 1454 "BUFFER_USAGE_LIMIT", 1455 "FULL", 1456 "LOCAL", 1457 "NO_WRITE_TO_BINLOG", 1458 "SAMPLE", 1459 "SKIP_LOCKED", 1460 "VERBOSE", 1461 } 1462 1463 ANALYZE_EXPRESSION_PARSERS = { 1464 "ALL": lambda self: self._parse_analyze_columns(), 1465 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1466 "DELETE": lambda self: self._parse_analyze_delete(), 1467 "DROP": lambda self: self._parse_analyze_histogram(), 1468 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1469 "LIST": lambda self: self._parse_analyze_list(), 1470 "PREDICATE": lambda self: self._parse_analyze_columns(), 1471 "UPDATE": lambda self: self._parse_analyze_histogram(), 1472 "VALIDATE": lambda self: self._parse_analyze_validate(), 1473 } 1474 1475 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1476 1477 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1478 1479 OPERATION_MODIFIERS: t.Set[str] = set() 1480 1481 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1482 1483 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1484 1485 STRICT_CAST = True 1486 1487 PREFIXED_PIVOT_COLUMNS = False 1488 IDENTIFY_PIVOT_STRINGS = False 1489 1490 LOG_DEFAULTS_TO_LN = False 1491 1492 # Whether the table sample clause expects CSV syntax 1493 TABLESAMPLE_CSV = False 1494 1495 # The default method used for table sampling 1496 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1497 1498 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1499 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1500 1501 # Whether the TRIM function expects the characters to trim as its first argument 1502 TRIM_PATTERN_FIRST = False 1503 1504 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1505 STRING_ALIASES = False 1506 1507 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1508 MODIFIERS_ATTACHED_TO_SET_OP = True 1509 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1510 1511 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1512 NO_PAREN_IF_COMMANDS = True 1513 1514 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1515 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1516 1517 # Whether the `:` operator is used to extract a value from a VARIANT column 1518 COLON_IS_VARIANT_EXTRACT = False 1519 1520 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1521 # If this is True and '(' is not found, the keyword will be treated as an identifier 1522 VALUES_FOLLOWED_BY_PAREN = True 1523 1524 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1525 SUPPORTS_IMPLICIT_UNNEST = False 1526 1527 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1528 INTERVAL_SPANS = True 1529 1530 # Whether a PARTITION clause can follow a table reference 1531 SUPPORTS_PARTITION_SELECTION = False 1532 1533 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1534 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1535 1536 # Whether the 'AS' keyword is optional in the CTE definition syntax 1537 OPTIONAL_ALIAS_TOKEN_CTE = True 1538 1539 # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword 1540 ALTER_RENAME_REQUIRES_COLUMN = True 1541 1542 # Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree. 1543 # In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is 1544 # to say, JOIN operators happen before comma operators. This is not the case in some dialects, such 1545 # as BigQuery, where all joins have the same precedence. 1546 JOINS_HAVE_EQUAL_PRECEDENCE = False 1547 1548 # Whether TIMESTAMP <literal> can produce a zone-aware timestamp 1549 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = False 1550 1551 # Whether map literals support arbitrary expressions as keys. 1552 # When True, allows complex keys like arrays or literals: {[1, 2]: 3}, {1: 2} (e.g. DuckDB). 1553 # When False, keys are typically restricted to identifiers. 1554 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = False 1555 1556 # Whether JSON_EXTRACT requires a JSON expression as the first argument, e.g this 1557 # is true for Snowflake but not for BigQuery which can also process strings 1558 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = False 1559 1560 # Dialects like Databricks support JOINS without join criteria 1561 # Adding an ON TRUE, makes transpilation semantically correct for other dialects 1562 ADD_JOIN_ON_TRUE = False 1563 1564 # Whether INTERVAL spans with literal format '\d+ hh:[mm:[ss[.ff]]]' 1565 # can omit the span unit `DAY TO MINUTE` or `DAY TO SECOND` 1566 SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT = False 1567 1568 __slots__ = ( 1569 "error_level", 1570 "error_message_context", 1571 "max_errors", 1572 "dialect", 1573 "sql", 1574 "errors", 1575 "_tokens", 1576 "_index", 1577 "_curr", 1578 "_next", 1579 "_prev", 1580 "_prev_comments", 1581 "_pipe_cte_counter", 1582 ) 1583 1584 # Autofilled 1585 SHOW_TRIE: t.Dict = {} 1586 SET_TRIE: t.Dict = {} 1587 1588 def __init__( 1589 self, 1590 error_level: t.Optional[ErrorLevel] = None, 1591 error_message_context: int = 100, 1592 max_errors: int = 3, 1593 dialect: DialectType = None, 1594 ): 1595 from sqlglot.dialects import Dialect 1596 1597 self.error_level = error_level or ErrorLevel.IMMEDIATE 1598 self.error_message_context = error_message_context 1599 self.max_errors = max_errors 1600 self.dialect = Dialect.get_or_raise(dialect) 1601 self.reset() 1602 1603 def reset(self): 1604 self.sql = "" 1605 self.errors = [] 1606 self._tokens = [] 1607 self._index = 0 1608 self._curr = None 1609 self._next = None 1610 self._prev = None 1611 self._prev_comments = None 1612 self._pipe_cte_counter = 0 1613 1614 def parse( 1615 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1616 ) -> t.List[t.Optional[exp.Expression]]: 1617 """ 1618 Parses a list of tokens and returns a list of syntax trees, one tree 1619 per parsed SQL statement. 1620 1621 Args: 1622 raw_tokens: The list of tokens. 1623 sql: The original SQL string, used to produce helpful debug messages. 1624 1625 Returns: 1626 The list of the produced syntax trees. 1627 """ 1628 return self._parse( 1629 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1630 ) 1631 1632 def parse_into( 1633 self, 1634 expression_types: exp.IntoType, 1635 raw_tokens: t.List[Token], 1636 sql: t.Optional[str] = None, 1637 ) -> t.List[t.Optional[exp.Expression]]: 1638 """ 1639 Parses a list of tokens into a given Expression type. If a collection of Expression 1640 types is given instead, this method will try to parse the token list into each one 1641 of them, stopping at the first for which the parsing succeeds. 1642 1643 Args: 1644 expression_types: The expression type(s) to try and parse the token list into. 1645 raw_tokens: The list of tokens. 1646 sql: The original SQL string, used to produce helpful debug messages. 1647 1648 Returns: 1649 The target Expression. 1650 """ 1651 errors = [] 1652 for expression_type in ensure_list(expression_types): 1653 parser = self.EXPRESSION_PARSERS.get(expression_type) 1654 if not parser: 1655 raise TypeError(f"No parser registered for {expression_type}") 1656 1657 try: 1658 return self._parse(parser, raw_tokens, sql) 1659 except ParseError as e: 1660 e.errors[0]["into_expression"] = expression_type 1661 errors.append(e) 1662 1663 raise ParseError( 1664 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1665 errors=merge_errors(errors), 1666 ) from errors[-1] 1667 1668 def _parse( 1669 self, 1670 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1671 raw_tokens: t.List[Token], 1672 sql: t.Optional[str] = None, 1673 ) -> t.List[t.Optional[exp.Expression]]: 1674 self.reset() 1675 self.sql = sql or "" 1676 1677 total = len(raw_tokens) 1678 chunks: t.List[t.List[Token]] = [[]] 1679 1680 for i, token in enumerate(raw_tokens): 1681 if token.token_type == TokenType.SEMICOLON: 1682 if token.comments: 1683 chunks.append([token]) 1684 1685 if i < total - 1: 1686 chunks.append([]) 1687 else: 1688 chunks[-1].append(token) 1689 1690 expressions = [] 1691 1692 for tokens in chunks: 1693 self._index = -1 1694 self._tokens = tokens 1695 self._advance() 1696 1697 expressions.append(parse_method(self)) 1698 1699 if self._index < len(self._tokens): 1700 self.raise_error("Invalid expression / Unexpected token") 1701 1702 self.check_errors() 1703 1704 return expressions 1705 1706 def check_errors(self) -> None: 1707 """Logs or raises any found errors, depending on the chosen error level setting.""" 1708 if self.error_level == ErrorLevel.WARN: 1709 for error in self.errors: 1710 logger.error(str(error)) 1711 elif self.error_level == ErrorLevel.RAISE and self.errors: 1712 raise ParseError( 1713 concat_messages(self.errors, self.max_errors), 1714 errors=merge_errors(self.errors), 1715 ) 1716 1717 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1718 """ 1719 Appends an error in the list of recorded errors or raises it, depending on the chosen 1720 error level setting. 1721 """ 1722 token = token or self._curr or self._prev or Token.string("") 1723 start = token.start 1724 end = token.end + 1 1725 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1726 highlight = self.sql[start:end] 1727 end_context = self.sql[end : end + self.error_message_context] 1728 1729 error = ParseError.new( 1730 f"{message}. Line {token.line}, Col: {token.col}.\n" 1731 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1732 description=message, 1733 line=token.line, 1734 col=token.col, 1735 start_context=start_context, 1736 highlight=highlight, 1737 end_context=end_context, 1738 ) 1739 1740 if self.error_level == ErrorLevel.IMMEDIATE: 1741 raise error 1742 1743 self.errors.append(error) 1744 1745 def expression( 1746 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1747 ) -> E: 1748 """ 1749 Creates a new, validated Expression. 1750 1751 Args: 1752 exp_class: The expression class to instantiate. 1753 comments: An optional list of comments to attach to the expression. 1754 kwargs: The arguments to set for the expression along with their respective values. 1755 1756 Returns: 1757 The target expression. 1758 """ 1759 instance = exp_class(**kwargs) 1760 instance.add_comments(comments) if comments else self._add_comments(instance) 1761 return self.validate_expression(instance) 1762 1763 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1764 if expression and self._prev_comments: 1765 expression.add_comments(self._prev_comments) 1766 self._prev_comments = None 1767 1768 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1769 """ 1770 Validates an Expression, making sure that all its mandatory arguments are set. 1771 1772 Args: 1773 expression: The expression to validate. 1774 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1775 1776 Returns: 1777 The validated expression. 1778 """ 1779 if self.error_level != ErrorLevel.IGNORE: 1780 for error_message in expression.error_messages(args): 1781 self.raise_error(error_message) 1782 1783 return expression 1784 1785 def _find_sql(self, start: Token, end: Token) -> str: 1786 return self.sql[start.start : end.end + 1] 1787 1788 def _is_connected(self) -> bool: 1789 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1790 1791 def _advance(self, times: int = 1) -> None: 1792 self._index += times 1793 self._curr = seq_get(self._tokens, self._index) 1794 self._next = seq_get(self._tokens, self._index + 1) 1795 1796 if self._index > 0: 1797 self._prev = self._tokens[self._index - 1] 1798 self._prev_comments = self._prev.comments 1799 else: 1800 self._prev = None 1801 self._prev_comments = None 1802 1803 def _retreat(self, index: int) -> None: 1804 if index != self._index: 1805 self._advance(index - self._index) 1806 1807 def _warn_unsupported(self) -> None: 1808 if len(self._tokens) <= 1: 1809 return 1810 1811 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1812 # interested in emitting a warning for the one being currently processed. 1813 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1814 1815 logger.warning( 1816 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1817 ) 1818 1819 def _parse_command(self) -> exp.Command: 1820 self._warn_unsupported() 1821 return self.expression( 1822 exp.Command, 1823 comments=self._prev_comments, 1824 this=self._prev.text.upper(), 1825 expression=self._parse_string(), 1826 ) 1827 1828 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1829 """ 1830 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1831 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1832 solve this by setting & resetting the parser state accordingly 1833 """ 1834 index = self._index 1835 error_level = self.error_level 1836 1837 self.error_level = ErrorLevel.IMMEDIATE 1838 try: 1839 this = parse_method() 1840 except ParseError: 1841 this = None 1842 finally: 1843 if not this or retreat: 1844 self._retreat(index) 1845 self.error_level = error_level 1846 1847 return this 1848 1849 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1850 start = self._prev 1851 exists = self._parse_exists() if allow_exists else None 1852 1853 self._match(TokenType.ON) 1854 1855 materialized = self._match_text_seq("MATERIALIZED") 1856 kind = self._match_set(self.CREATABLES) and self._prev 1857 if not kind: 1858 return self._parse_as_command(start) 1859 1860 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1861 this = self._parse_user_defined_function(kind=kind.token_type) 1862 elif kind.token_type == TokenType.TABLE: 1863 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1864 elif kind.token_type == TokenType.COLUMN: 1865 this = self._parse_column() 1866 else: 1867 this = self._parse_id_var() 1868 1869 self._match(TokenType.IS) 1870 1871 return self.expression( 1872 exp.Comment, 1873 this=this, 1874 kind=kind.text, 1875 expression=self._parse_string(), 1876 exists=exists, 1877 materialized=materialized, 1878 ) 1879 1880 def _parse_to_table( 1881 self, 1882 ) -> exp.ToTableProperty: 1883 table = self._parse_table_parts(schema=True) 1884 return self.expression(exp.ToTableProperty, this=table) 1885 1886 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1887 def _parse_ttl(self) -> exp.Expression: 1888 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1889 this = self._parse_bitwise() 1890 1891 if self._match_text_seq("DELETE"): 1892 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1893 if self._match_text_seq("RECOMPRESS"): 1894 return self.expression( 1895 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1896 ) 1897 if self._match_text_seq("TO", "DISK"): 1898 return self.expression( 1899 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1900 ) 1901 if self._match_text_seq("TO", "VOLUME"): 1902 return self.expression( 1903 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1904 ) 1905 1906 return this 1907 1908 expressions = self._parse_csv(_parse_ttl_action) 1909 where = self._parse_where() 1910 group = self._parse_group() 1911 1912 aggregates = None 1913 if group and self._match(TokenType.SET): 1914 aggregates = self._parse_csv(self._parse_set_item) 1915 1916 return self.expression( 1917 exp.MergeTreeTTL, 1918 expressions=expressions, 1919 where=where, 1920 group=group, 1921 aggregates=aggregates, 1922 ) 1923 1924 def _parse_statement(self) -> t.Optional[exp.Expression]: 1925 if self._curr is None: 1926 return None 1927 1928 if self._match_set(self.STATEMENT_PARSERS): 1929 comments = self._prev_comments 1930 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1931 stmt.add_comments(comments, prepend=True) 1932 return stmt 1933 1934 if self._match_set(self.dialect.tokenizer_class.COMMANDS): 1935 return self._parse_command() 1936 1937 expression = self._parse_expression() 1938 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1939 return self._parse_query_modifiers(expression) 1940 1941 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1942 start = self._prev 1943 temporary = self._match(TokenType.TEMPORARY) 1944 materialized = self._match_text_seq("MATERIALIZED") 1945 1946 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1947 if not kind: 1948 return self._parse_as_command(start) 1949 1950 concurrently = self._match_text_seq("CONCURRENTLY") 1951 if_exists = exists or self._parse_exists() 1952 1953 if kind == "COLUMN": 1954 this = self._parse_column() 1955 else: 1956 this = self._parse_table_parts( 1957 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1958 ) 1959 1960 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1961 1962 if self._match(TokenType.L_PAREN, advance=False): 1963 expressions = self._parse_wrapped_csv(self._parse_types) 1964 else: 1965 expressions = None 1966 1967 return self.expression( 1968 exp.Drop, 1969 exists=if_exists, 1970 this=this, 1971 expressions=expressions, 1972 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1973 temporary=temporary, 1974 materialized=materialized, 1975 cascade=self._match_text_seq("CASCADE"), 1976 constraints=self._match_text_seq("CONSTRAINTS"), 1977 purge=self._match_text_seq("PURGE"), 1978 cluster=cluster, 1979 concurrently=concurrently, 1980 ) 1981 1982 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1983 return ( 1984 self._match_text_seq("IF") 1985 and (not not_ or self._match(TokenType.NOT)) 1986 and self._match(TokenType.EXISTS) 1987 ) 1988 1989 def _parse_create(self) -> exp.Create | exp.Command: 1990 # Note: this can't be None because we've matched a statement parser 1991 start = self._prev 1992 1993 replace = ( 1994 start.token_type == TokenType.REPLACE 1995 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1996 or self._match_pair(TokenType.OR, TokenType.ALTER) 1997 ) 1998 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1999 2000 unique = self._match(TokenType.UNIQUE) 2001 2002 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 2003 clustered = True 2004 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 2005 "COLUMNSTORE" 2006 ): 2007 clustered = False 2008 else: 2009 clustered = None 2010 2011 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 2012 self._advance() 2013 2014 properties = None 2015 create_token = self._match_set(self.CREATABLES) and self._prev 2016 2017 if not create_token: 2018 # exp.Properties.Location.POST_CREATE 2019 properties = self._parse_properties() 2020 create_token = self._match_set(self.CREATABLES) and self._prev 2021 2022 if not properties or not create_token: 2023 return self._parse_as_command(start) 2024 2025 concurrently = self._match_text_seq("CONCURRENTLY") 2026 exists = self._parse_exists(not_=True) 2027 this = None 2028 expression: t.Optional[exp.Expression] = None 2029 indexes = None 2030 no_schema_binding = None 2031 begin = None 2032 end = None 2033 clone = None 2034 2035 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 2036 nonlocal properties 2037 if properties and temp_props: 2038 properties.expressions.extend(temp_props.expressions) 2039 elif temp_props: 2040 properties = temp_props 2041 2042 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2043 this = self._parse_user_defined_function(kind=create_token.token_type) 2044 2045 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 2046 extend_props(self._parse_properties()) 2047 2048 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 2049 extend_props(self._parse_properties()) 2050 2051 if not expression: 2052 if self._match(TokenType.COMMAND): 2053 expression = self._parse_as_command(self._prev) 2054 else: 2055 begin = self._match(TokenType.BEGIN) 2056 return_ = self._match_text_seq("RETURN") 2057 2058 if self._match(TokenType.STRING, advance=False): 2059 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2060 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2061 expression = self._parse_string() 2062 extend_props(self._parse_properties()) 2063 else: 2064 expression = self._parse_user_defined_function_expression() 2065 2066 end = self._match_text_seq("END") 2067 2068 if return_: 2069 expression = self.expression(exp.Return, this=expression) 2070 elif create_token.token_type == TokenType.INDEX: 2071 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2072 if not self._match(TokenType.ON): 2073 index = self._parse_id_var() 2074 anonymous = False 2075 else: 2076 index = None 2077 anonymous = True 2078 2079 this = self._parse_index(index=index, anonymous=anonymous) 2080 elif create_token.token_type in self.DB_CREATABLES: 2081 table_parts = self._parse_table_parts( 2082 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2083 ) 2084 2085 # exp.Properties.Location.POST_NAME 2086 self._match(TokenType.COMMA) 2087 extend_props(self._parse_properties(before=True)) 2088 2089 this = self._parse_schema(this=table_parts) 2090 2091 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2092 extend_props(self._parse_properties()) 2093 2094 has_alias = self._match(TokenType.ALIAS) 2095 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2096 # exp.Properties.Location.POST_ALIAS 2097 extend_props(self._parse_properties()) 2098 2099 if create_token.token_type == TokenType.SEQUENCE: 2100 expression = self._parse_types() 2101 props = self._parse_properties() 2102 if props: 2103 sequence_props = exp.SequenceProperties() 2104 options = [] 2105 for prop in props: 2106 if isinstance(prop, exp.SequenceProperties): 2107 for arg, value in prop.args.items(): 2108 if arg == "options": 2109 options.extend(value) 2110 else: 2111 sequence_props.set(arg, value) 2112 prop.pop() 2113 2114 if options: 2115 sequence_props.set("options", options) 2116 2117 props.append("expressions", sequence_props) 2118 extend_props(props) 2119 else: 2120 expression = self._parse_ddl_select() 2121 2122 # Some dialects also support using a table as an alias instead of a SELECT. 2123 # Here we fallback to this as an alternative. 2124 if not expression and has_alias: 2125 expression = self._try_parse(self._parse_table_parts) 2126 2127 if create_token.token_type == TokenType.TABLE: 2128 # exp.Properties.Location.POST_EXPRESSION 2129 extend_props(self._parse_properties()) 2130 2131 indexes = [] 2132 while True: 2133 index = self._parse_index() 2134 2135 # exp.Properties.Location.POST_INDEX 2136 extend_props(self._parse_properties()) 2137 if not index: 2138 break 2139 else: 2140 self._match(TokenType.COMMA) 2141 indexes.append(index) 2142 elif create_token.token_type == TokenType.VIEW: 2143 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2144 no_schema_binding = True 2145 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2146 extend_props(self._parse_properties()) 2147 2148 shallow = self._match_text_seq("SHALLOW") 2149 2150 if self._match_texts(self.CLONE_KEYWORDS): 2151 copy = self._prev.text.lower() == "copy" 2152 clone = self.expression( 2153 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2154 ) 2155 2156 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2157 return self._parse_as_command(start) 2158 2159 create_kind_text = create_token.text.upper() 2160 return self.expression( 2161 exp.Create, 2162 this=this, 2163 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2164 replace=replace, 2165 refresh=refresh, 2166 unique=unique, 2167 expression=expression, 2168 exists=exists, 2169 properties=properties, 2170 indexes=indexes, 2171 no_schema_binding=no_schema_binding, 2172 begin=begin, 2173 end=end, 2174 clone=clone, 2175 concurrently=concurrently, 2176 clustered=clustered, 2177 ) 2178 2179 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2180 seq = exp.SequenceProperties() 2181 2182 options = [] 2183 index = self._index 2184 2185 while self._curr: 2186 self._match(TokenType.COMMA) 2187 if self._match_text_seq("INCREMENT"): 2188 self._match_text_seq("BY") 2189 self._match_text_seq("=") 2190 seq.set("increment", self._parse_term()) 2191 elif self._match_text_seq("MINVALUE"): 2192 seq.set("minvalue", self._parse_term()) 2193 elif self._match_text_seq("MAXVALUE"): 2194 seq.set("maxvalue", self._parse_term()) 2195 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2196 self._match_text_seq("=") 2197 seq.set("start", self._parse_term()) 2198 elif self._match_text_seq("CACHE"): 2199 # T-SQL allows empty CACHE which is initialized dynamically 2200 seq.set("cache", self._parse_number() or True) 2201 elif self._match_text_seq("OWNED", "BY"): 2202 # "OWNED BY NONE" is the default 2203 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2204 else: 2205 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2206 if opt: 2207 options.append(opt) 2208 else: 2209 break 2210 2211 seq.set("options", options if options else None) 2212 return None if self._index == index else seq 2213 2214 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2215 # only used for teradata currently 2216 self._match(TokenType.COMMA) 2217 2218 kwargs = { 2219 "no": self._match_text_seq("NO"), 2220 "dual": self._match_text_seq("DUAL"), 2221 "before": self._match_text_seq("BEFORE"), 2222 "default": self._match_text_seq("DEFAULT"), 2223 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2224 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2225 "after": self._match_text_seq("AFTER"), 2226 "minimum": self._match_texts(("MIN", "MINIMUM")), 2227 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2228 } 2229 2230 if self._match_texts(self.PROPERTY_PARSERS): 2231 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2232 try: 2233 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2234 except TypeError: 2235 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2236 2237 return None 2238 2239 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2240 return self._parse_wrapped_csv(self._parse_property) 2241 2242 def _parse_property(self) -> t.Optional[exp.Expression]: 2243 if self._match_texts(self.PROPERTY_PARSERS): 2244 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2245 2246 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2247 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2248 2249 if self._match_text_seq("COMPOUND", "SORTKEY"): 2250 return self._parse_sortkey(compound=True) 2251 2252 if self._match_text_seq("SQL", "SECURITY"): 2253 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2254 2255 index = self._index 2256 2257 seq_props = self._parse_sequence_properties() 2258 if seq_props: 2259 return seq_props 2260 2261 self._retreat(index) 2262 key = self._parse_column() 2263 2264 if not self._match(TokenType.EQ): 2265 self._retreat(index) 2266 return None 2267 2268 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2269 if isinstance(key, exp.Column): 2270 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2271 2272 value = self._parse_bitwise() or self._parse_var(any_token=True) 2273 2274 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2275 if isinstance(value, exp.Column): 2276 value = exp.var(value.name) 2277 2278 return self.expression(exp.Property, this=key, value=value) 2279 2280 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2281 if self._match_text_seq("BY"): 2282 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2283 2284 self._match(TokenType.ALIAS) 2285 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2286 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2287 2288 return self.expression( 2289 exp.FileFormatProperty, 2290 this=( 2291 self.expression( 2292 exp.InputOutputFormat, 2293 input_format=input_format, 2294 output_format=output_format, 2295 ) 2296 if input_format or output_format 2297 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2298 ), 2299 hive_format=True, 2300 ) 2301 2302 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2303 field = self._parse_field() 2304 if isinstance(field, exp.Identifier) and not field.quoted: 2305 field = exp.var(field) 2306 2307 return field 2308 2309 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2310 self._match(TokenType.EQ) 2311 self._match(TokenType.ALIAS) 2312 2313 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2314 2315 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2316 properties = [] 2317 while True: 2318 if before: 2319 prop = self._parse_property_before() 2320 else: 2321 prop = self._parse_property() 2322 if not prop: 2323 break 2324 for p in ensure_list(prop): 2325 properties.append(p) 2326 2327 if properties: 2328 return self.expression(exp.Properties, expressions=properties) 2329 2330 return None 2331 2332 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2333 return self.expression( 2334 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2335 ) 2336 2337 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2338 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2339 security_specifier = self._prev.text.upper() 2340 return self.expression(exp.SecurityProperty, this=security_specifier) 2341 return None 2342 2343 def _parse_settings_property(self) -> exp.SettingsProperty: 2344 return self.expression( 2345 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2346 ) 2347 2348 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2349 if self._index >= 2: 2350 pre_volatile_token = self._tokens[self._index - 2] 2351 else: 2352 pre_volatile_token = None 2353 2354 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2355 return exp.VolatileProperty() 2356 2357 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2358 2359 def _parse_retention_period(self) -> exp.Var: 2360 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2361 number = self._parse_number() 2362 number_str = f"{number} " if number else "" 2363 unit = self._parse_var(any_token=True) 2364 return exp.var(f"{number_str}{unit}") 2365 2366 def _parse_system_versioning_property( 2367 self, with_: bool = False 2368 ) -> exp.WithSystemVersioningProperty: 2369 self._match(TokenType.EQ) 2370 prop = self.expression( 2371 exp.WithSystemVersioningProperty, 2372 **{ # type: ignore 2373 "on": True, 2374 "with": with_, 2375 }, 2376 ) 2377 2378 if self._match_text_seq("OFF"): 2379 prop.set("on", False) 2380 return prop 2381 2382 self._match(TokenType.ON) 2383 if self._match(TokenType.L_PAREN): 2384 while self._curr and not self._match(TokenType.R_PAREN): 2385 if self._match_text_seq("HISTORY_TABLE", "="): 2386 prop.set("this", self._parse_table_parts()) 2387 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2388 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2389 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2390 prop.set("retention_period", self._parse_retention_period()) 2391 2392 self._match(TokenType.COMMA) 2393 2394 return prop 2395 2396 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2397 self._match(TokenType.EQ) 2398 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2399 prop = self.expression(exp.DataDeletionProperty, on=on) 2400 2401 if self._match(TokenType.L_PAREN): 2402 while self._curr and not self._match(TokenType.R_PAREN): 2403 if self._match_text_seq("FILTER_COLUMN", "="): 2404 prop.set("filter_column", self._parse_column()) 2405 elif self._match_text_seq("RETENTION_PERIOD", "="): 2406 prop.set("retention_period", self._parse_retention_period()) 2407 2408 self._match(TokenType.COMMA) 2409 2410 return prop 2411 2412 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2413 kind = "HASH" 2414 expressions: t.Optional[t.List[exp.Expression]] = None 2415 if self._match_text_seq("BY", "HASH"): 2416 expressions = self._parse_wrapped_csv(self._parse_id_var) 2417 elif self._match_text_seq("BY", "RANDOM"): 2418 kind = "RANDOM" 2419 2420 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2421 buckets: t.Optional[exp.Expression] = None 2422 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2423 buckets = self._parse_number() 2424 2425 return self.expression( 2426 exp.DistributedByProperty, 2427 expressions=expressions, 2428 kind=kind, 2429 buckets=buckets, 2430 order=self._parse_order(), 2431 ) 2432 2433 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2434 self._match_text_seq("KEY") 2435 expressions = self._parse_wrapped_id_vars() 2436 return self.expression(expr_type, expressions=expressions) 2437 2438 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2439 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2440 prop = self._parse_system_versioning_property(with_=True) 2441 self._match_r_paren() 2442 return prop 2443 2444 if self._match(TokenType.L_PAREN, advance=False): 2445 return self._parse_wrapped_properties() 2446 2447 if self._match_text_seq("JOURNAL"): 2448 return self._parse_withjournaltable() 2449 2450 if self._match_texts(self.VIEW_ATTRIBUTES): 2451 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2452 2453 if self._match_text_seq("DATA"): 2454 return self._parse_withdata(no=False) 2455 elif self._match_text_seq("NO", "DATA"): 2456 return self._parse_withdata(no=True) 2457 2458 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2459 return self._parse_serde_properties(with_=True) 2460 2461 if self._match(TokenType.SCHEMA): 2462 return self.expression( 2463 exp.WithSchemaBindingProperty, 2464 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2465 ) 2466 2467 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2468 return self.expression( 2469 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2470 ) 2471 2472 if not self._next: 2473 return None 2474 2475 return self._parse_withisolatedloading() 2476 2477 def _parse_procedure_option(self) -> exp.Expression | None: 2478 if self._match_text_seq("EXECUTE", "AS"): 2479 return self.expression( 2480 exp.ExecuteAsProperty, 2481 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2482 or self._parse_string(), 2483 ) 2484 2485 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2486 2487 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2488 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2489 self._match(TokenType.EQ) 2490 2491 user = self._parse_id_var() 2492 self._match(TokenType.PARAMETER) 2493 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2494 2495 if not user or not host: 2496 return None 2497 2498 return exp.DefinerProperty(this=f"{user}@{host}") 2499 2500 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2501 self._match(TokenType.TABLE) 2502 self._match(TokenType.EQ) 2503 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2504 2505 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2506 return self.expression(exp.LogProperty, no=no) 2507 2508 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2509 return self.expression(exp.JournalProperty, **kwargs) 2510 2511 def _parse_checksum(self) -> exp.ChecksumProperty: 2512 self._match(TokenType.EQ) 2513 2514 on = None 2515 if self._match(TokenType.ON): 2516 on = True 2517 elif self._match_text_seq("OFF"): 2518 on = False 2519 2520 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2521 2522 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2523 return self.expression( 2524 exp.Cluster, 2525 expressions=( 2526 self._parse_wrapped_csv(self._parse_ordered) 2527 if wrapped 2528 else self._parse_csv(self._parse_ordered) 2529 ), 2530 ) 2531 2532 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2533 self._match_text_seq("BY") 2534 2535 self._match_l_paren() 2536 expressions = self._parse_csv(self._parse_column) 2537 self._match_r_paren() 2538 2539 if self._match_text_seq("SORTED", "BY"): 2540 self._match_l_paren() 2541 sorted_by = self._parse_csv(self._parse_ordered) 2542 self._match_r_paren() 2543 else: 2544 sorted_by = None 2545 2546 self._match(TokenType.INTO) 2547 buckets = self._parse_number() 2548 self._match_text_seq("BUCKETS") 2549 2550 return self.expression( 2551 exp.ClusteredByProperty, 2552 expressions=expressions, 2553 sorted_by=sorted_by, 2554 buckets=buckets, 2555 ) 2556 2557 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2558 if not self._match_text_seq("GRANTS"): 2559 self._retreat(self._index - 1) 2560 return None 2561 2562 return self.expression(exp.CopyGrantsProperty) 2563 2564 def _parse_freespace(self) -> exp.FreespaceProperty: 2565 self._match(TokenType.EQ) 2566 return self.expression( 2567 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2568 ) 2569 2570 def _parse_mergeblockratio( 2571 self, no: bool = False, default: bool = False 2572 ) -> exp.MergeBlockRatioProperty: 2573 if self._match(TokenType.EQ): 2574 return self.expression( 2575 exp.MergeBlockRatioProperty, 2576 this=self._parse_number(), 2577 percent=self._match(TokenType.PERCENT), 2578 ) 2579 2580 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2581 2582 def _parse_datablocksize( 2583 self, 2584 default: t.Optional[bool] = None, 2585 minimum: t.Optional[bool] = None, 2586 maximum: t.Optional[bool] = None, 2587 ) -> exp.DataBlocksizeProperty: 2588 self._match(TokenType.EQ) 2589 size = self._parse_number() 2590 2591 units = None 2592 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2593 units = self._prev.text 2594 2595 return self.expression( 2596 exp.DataBlocksizeProperty, 2597 size=size, 2598 units=units, 2599 default=default, 2600 minimum=minimum, 2601 maximum=maximum, 2602 ) 2603 2604 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2605 self._match(TokenType.EQ) 2606 always = self._match_text_seq("ALWAYS") 2607 manual = self._match_text_seq("MANUAL") 2608 never = self._match_text_seq("NEVER") 2609 default = self._match_text_seq("DEFAULT") 2610 2611 autotemp = None 2612 if self._match_text_seq("AUTOTEMP"): 2613 autotemp = self._parse_schema() 2614 2615 return self.expression( 2616 exp.BlockCompressionProperty, 2617 always=always, 2618 manual=manual, 2619 never=never, 2620 default=default, 2621 autotemp=autotemp, 2622 ) 2623 2624 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2625 index = self._index 2626 no = self._match_text_seq("NO") 2627 concurrent = self._match_text_seq("CONCURRENT") 2628 2629 if not self._match_text_seq("ISOLATED", "LOADING"): 2630 self._retreat(index) 2631 return None 2632 2633 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2634 return self.expression( 2635 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2636 ) 2637 2638 def _parse_locking(self) -> exp.LockingProperty: 2639 if self._match(TokenType.TABLE): 2640 kind = "TABLE" 2641 elif self._match(TokenType.VIEW): 2642 kind = "VIEW" 2643 elif self._match(TokenType.ROW): 2644 kind = "ROW" 2645 elif self._match_text_seq("DATABASE"): 2646 kind = "DATABASE" 2647 else: 2648 kind = None 2649 2650 if kind in ("DATABASE", "TABLE", "VIEW"): 2651 this = self._parse_table_parts() 2652 else: 2653 this = None 2654 2655 if self._match(TokenType.FOR): 2656 for_or_in = "FOR" 2657 elif self._match(TokenType.IN): 2658 for_or_in = "IN" 2659 else: 2660 for_or_in = None 2661 2662 if self._match_text_seq("ACCESS"): 2663 lock_type = "ACCESS" 2664 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2665 lock_type = "EXCLUSIVE" 2666 elif self._match_text_seq("SHARE"): 2667 lock_type = "SHARE" 2668 elif self._match_text_seq("READ"): 2669 lock_type = "READ" 2670 elif self._match_text_seq("WRITE"): 2671 lock_type = "WRITE" 2672 elif self._match_text_seq("CHECKSUM"): 2673 lock_type = "CHECKSUM" 2674 else: 2675 lock_type = None 2676 2677 override = self._match_text_seq("OVERRIDE") 2678 2679 return self.expression( 2680 exp.LockingProperty, 2681 this=this, 2682 kind=kind, 2683 for_or_in=for_or_in, 2684 lock_type=lock_type, 2685 override=override, 2686 ) 2687 2688 def _parse_partition_by(self) -> t.List[exp.Expression]: 2689 if self._match(TokenType.PARTITION_BY): 2690 return self._parse_csv(self._parse_assignment) 2691 return [] 2692 2693 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2694 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2695 if self._match_text_seq("MINVALUE"): 2696 return exp.var("MINVALUE") 2697 if self._match_text_seq("MAXVALUE"): 2698 return exp.var("MAXVALUE") 2699 return self._parse_bitwise() 2700 2701 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2702 expression = None 2703 from_expressions = None 2704 to_expressions = None 2705 2706 if self._match(TokenType.IN): 2707 this = self._parse_wrapped_csv(self._parse_bitwise) 2708 elif self._match(TokenType.FROM): 2709 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2710 self._match_text_seq("TO") 2711 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2712 elif self._match_text_seq("WITH", "(", "MODULUS"): 2713 this = self._parse_number() 2714 self._match_text_seq(",", "REMAINDER") 2715 expression = self._parse_number() 2716 self._match_r_paren() 2717 else: 2718 self.raise_error("Failed to parse partition bound spec.") 2719 2720 return self.expression( 2721 exp.PartitionBoundSpec, 2722 this=this, 2723 expression=expression, 2724 from_expressions=from_expressions, 2725 to_expressions=to_expressions, 2726 ) 2727 2728 # https://www.postgresql.org/docs/current/sql-createtable.html 2729 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2730 if not self._match_text_seq("OF"): 2731 self._retreat(self._index - 1) 2732 return None 2733 2734 this = self._parse_table(schema=True) 2735 2736 if self._match(TokenType.DEFAULT): 2737 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2738 elif self._match_text_seq("FOR", "VALUES"): 2739 expression = self._parse_partition_bound_spec() 2740 else: 2741 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2742 2743 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2744 2745 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2746 self._match(TokenType.EQ) 2747 return self.expression( 2748 exp.PartitionedByProperty, 2749 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2750 ) 2751 2752 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2753 if self._match_text_seq("AND", "STATISTICS"): 2754 statistics = True 2755 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2756 statistics = False 2757 else: 2758 statistics = None 2759 2760 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2761 2762 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2763 if self._match_text_seq("SQL"): 2764 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2765 return None 2766 2767 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2768 if self._match_text_seq("SQL", "DATA"): 2769 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2770 return None 2771 2772 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2773 if self._match_text_seq("PRIMARY", "INDEX"): 2774 return exp.NoPrimaryIndexProperty() 2775 if self._match_text_seq("SQL"): 2776 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2777 return None 2778 2779 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2780 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2781 return exp.OnCommitProperty() 2782 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2783 return exp.OnCommitProperty(delete=True) 2784 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2785 2786 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2787 if self._match_text_seq("SQL", "DATA"): 2788 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2789 return None 2790 2791 def _parse_distkey(self) -> exp.DistKeyProperty: 2792 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2793 2794 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2795 table = self._parse_table(schema=True) 2796 2797 options = [] 2798 while self._match_texts(("INCLUDING", "EXCLUDING")): 2799 this = self._prev.text.upper() 2800 2801 id_var = self._parse_id_var() 2802 if not id_var: 2803 return None 2804 2805 options.append( 2806 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2807 ) 2808 2809 return self.expression(exp.LikeProperty, this=table, expressions=options) 2810 2811 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2812 return self.expression( 2813 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2814 ) 2815 2816 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2817 self._match(TokenType.EQ) 2818 return self.expression( 2819 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2820 ) 2821 2822 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2823 self._match_text_seq("WITH", "CONNECTION") 2824 return self.expression( 2825 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2826 ) 2827 2828 def _parse_returns(self) -> exp.ReturnsProperty: 2829 value: t.Optional[exp.Expression] 2830 null = None 2831 is_table = self._match(TokenType.TABLE) 2832 2833 if is_table: 2834 if self._match(TokenType.LT): 2835 value = self.expression( 2836 exp.Schema, 2837 this="TABLE", 2838 expressions=self._parse_csv(self._parse_struct_types), 2839 ) 2840 if not self._match(TokenType.GT): 2841 self.raise_error("Expecting >") 2842 else: 2843 value = self._parse_schema(exp.var("TABLE")) 2844 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2845 null = True 2846 value = None 2847 else: 2848 value = self._parse_types() 2849 2850 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2851 2852 def _parse_describe(self) -> exp.Describe: 2853 kind = self._match_set(self.CREATABLES) and self._prev.text 2854 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2855 if self._match(TokenType.DOT): 2856 style = None 2857 self._retreat(self._index - 2) 2858 2859 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2860 2861 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2862 this = self._parse_statement() 2863 else: 2864 this = self._parse_table(schema=True) 2865 2866 properties = self._parse_properties() 2867 expressions = properties.expressions if properties else None 2868 partition = self._parse_partition() 2869 return self.expression( 2870 exp.Describe, 2871 this=this, 2872 style=style, 2873 kind=kind, 2874 expressions=expressions, 2875 partition=partition, 2876 format=format, 2877 ) 2878 2879 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2880 kind = self._prev.text.upper() 2881 expressions = [] 2882 2883 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2884 if self._match(TokenType.WHEN): 2885 expression = self._parse_disjunction() 2886 self._match(TokenType.THEN) 2887 else: 2888 expression = None 2889 2890 else_ = self._match(TokenType.ELSE) 2891 2892 if not self._match(TokenType.INTO): 2893 return None 2894 2895 return self.expression( 2896 exp.ConditionalInsert, 2897 this=self.expression( 2898 exp.Insert, 2899 this=self._parse_table(schema=True), 2900 expression=self._parse_derived_table_values(), 2901 ), 2902 expression=expression, 2903 else_=else_, 2904 ) 2905 2906 expression = parse_conditional_insert() 2907 while expression is not None: 2908 expressions.append(expression) 2909 expression = parse_conditional_insert() 2910 2911 return self.expression( 2912 exp.MultitableInserts, 2913 kind=kind, 2914 comments=comments, 2915 expressions=expressions, 2916 source=self._parse_table(), 2917 ) 2918 2919 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2920 comments = [] 2921 hint = self._parse_hint() 2922 overwrite = self._match(TokenType.OVERWRITE) 2923 ignore = self._match(TokenType.IGNORE) 2924 local = self._match_text_seq("LOCAL") 2925 alternative = None 2926 is_function = None 2927 2928 if self._match_text_seq("DIRECTORY"): 2929 this: t.Optional[exp.Expression] = self.expression( 2930 exp.Directory, 2931 this=self._parse_var_or_string(), 2932 local=local, 2933 row_format=self._parse_row_format(match_row=True), 2934 ) 2935 else: 2936 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2937 comments += ensure_list(self._prev_comments) 2938 return self._parse_multitable_inserts(comments) 2939 2940 if self._match(TokenType.OR): 2941 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2942 2943 self._match(TokenType.INTO) 2944 comments += ensure_list(self._prev_comments) 2945 self._match(TokenType.TABLE) 2946 is_function = self._match(TokenType.FUNCTION) 2947 2948 this = ( 2949 self._parse_table(schema=True, parse_partition=True) 2950 if not is_function 2951 else self._parse_function() 2952 ) 2953 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2954 this.set("alias", self._parse_table_alias()) 2955 2956 returning = self._parse_returning() 2957 2958 return self.expression( 2959 exp.Insert, 2960 comments=comments, 2961 hint=hint, 2962 is_function=is_function, 2963 this=this, 2964 stored=self._match_text_seq("STORED") and self._parse_stored(), 2965 by_name=self._match_text_seq("BY", "NAME"), 2966 exists=self._parse_exists(), 2967 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2968 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2969 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2970 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2971 conflict=self._parse_on_conflict(), 2972 returning=returning or self._parse_returning(), 2973 overwrite=overwrite, 2974 alternative=alternative, 2975 ignore=ignore, 2976 source=self._match(TokenType.TABLE) and self._parse_table(), 2977 ) 2978 2979 def _parse_kill(self) -> exp.Kill: 2980 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2981 2982 return self.expression( 2983 exp.Kill, 2984 this=self._parse_primary(), 2985 kind=kind, 2986 ) 2987 2988 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2989 conflict = self._match_text_seq("ON", "CONFLICT") 2990 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2991 2992 if not conflict and not duplicate: 2993 return None 2994 2995 conflict_keys = None 2996 constraint = None 2997 2998 if conflict: 2999 if self._match_text_seq("ON", "CONSTRAINT"): 3000 constraint = self._parse_id_var() 3001 elif self._match(TokenType.L_PAREN): 3002 conflict_keys = self._parse_csv(self._parse_id_var) 3003 self._match_r_paren() 3004 3005 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 3006 if self._prev.token_type == TokenType.UPDATE: 3007 self._match(TokenType.SET) 3008 expressions = self._parse_csv(self._parse_equality) 3009 else: 3010 expressions = None 3011 3012 return self.expression( 3013 exp.OnConflict, 3014 duplicate=duplicate, 3015 expressions=expressions, 3016 action=action, 3017 conflict_keys=conflict_keys, 3018 constraint=constraint, 3019 where=self._parse_where(), 3020 ) 3021 3022 def _parse_returning(self) -> t.Optional[exp.Returning]: 3023 if not self._match(TokenType.RETURNING): 3024 return None 3025 return self.expression( 3026 exp.Returning, 3027 expressions=self._parse_csv(self._parse_expression), 3028 into=self._match(TokenType.INTO) and self._parse_table_part(), 3029 ) 3030 3031 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3032 if not self._match(TokenType.FORMAT): 3033 return None 3034 return self._parse_row_format() 3035 3036 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 3037 index = self._index 3038 with_ = with_ or self._match_text_seq("WITH") 3039 3040 if not self._match(TokenType.SERDE_PROPERTIES): 3041 self._retreat(index) 3042 return None 3043 return self.expression( 3044 exp.SerdeProperties, 3045 **{ # type: ignore 3046 "expressions": self._parse_wrapped_properties(), 3047 "with": with_, 3048 }, 3049 ) 3050 3051 def _parse_row_format( 3052 self, match_row: bool = False 3053 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3054 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 3055 return None 3056 3057 if self._match_text_seq("SERDE"): 3058 this = self._parse_string() 3059 3060 serde_properties = self._parse_serde_properties() 3061 3062 return self.expression( 3063 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 3064 ) 3065 3066 self._match_text_seq("DELIMITED") 3067 3068 kwargs = {} 3069 3070 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 3071 kwargs["fields"] = self._parse_string() 3072 if self._match_text_seq("ESCAPED", "BY"): 3073 kwargs["escaped"] = self._parse_string() 3074 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 3075 kwargs["collection_items"] = self._parse_string() 3076 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 3077 kwargs["map_keys"] = self._parse_string() 3078 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3079 kwargs["lines"] = self._parse_string() 3080 if self._match_text_seq("NULL", "DEFINED", "AS"): 3081 kwargs["null"] = self._parse_string() 3082 3083 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 3084 3085 def _parse_load(self) -> exp.LoadData | exp.Command: 3086 if self._match_text_seq("DATA"): 3087 local = self._match_text_seq("LOCAL") 3088 self._match_text_seq("INPATH") 3089 inpath = self._parse_string() 3090 overwrite = self._match(TokenType.OVERWRITE) 3091 self._match_pair(TokenType.INTO, TokenType.TABLE) 3092 3093 return self.expression( 3094 exp.LoadData, 3095 this=self._parse_table(schema=True), 3096 local=local, 3097 overwrite=overwrite, 3098 inpath=inpath, 3099 partition=self._parse_partition(), 3100 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3101 serde=self._match_text_seq("SERDE") and self._parse_string(), 3102 ) 3103 return self._parse_as_command(self._prev) 3104 3105 def _parse_delete(self) -> exp.Delete: 3106 # This handles MySQL's "Multiple-Table Syntax" 3107 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3108 tables = None 3109 if not self._match(TokenType.FROM, advance=False): 3110 tables = self._parse_csv(self._parse_table) or None 3111 3112 returning = self._parse_returning() 3113 3114 return self.expression( 3115 exp.Delete, 3116 tables=tables, 3117 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3118 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3119 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3120 where=self._parse_where(), 3121 returning=returning or self._parse_returning(), 3122 limit=self._parse_limit(), 3123 ) 3124 3125 def _parse_update(self) -> exp.Update: 3126 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 3127 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 3128 returning = self._parse_returning() 3129 return self.expression( 3130 exp.Update, 3131 **{ # type: ignore 3132 "this": this, 3133 "expressions": expressions, 3134 "from": self._parse_from(joins=True), 3135 "where": self._parse_where(), 3136 "returning": returning or self._parse_returning(), 3137 "order": self._parse_order(), 3138 "limit": self._parse_limit(), 3139 }, 3140 ) 3141 3142 def _parse_use(self) -> exp.Use: 3143 return self.expression( 3144 exp.Use, 3145 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3146 this=self._parse_table(schema=False), 3147 ) 3148 3149 def _parse_uncache(self) -> exp.Uncache: 3150 if not self._match(TokenType.TABLE): 3151 self.raise_error("Expecting TABLE after UNCACHE") 3152 3153 return self.expression( 3154 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3155 ) 3156 3157 def _parse_cache(self) -> exp.Cache: 3158 lazy = self._match_text_seq("LAZY") 3159 self._match(TokenType.TABLE) 3160 table = self._parse_table(schema=True) 3161 3162 options = [] 3163 if self._match_text_seq("OPTIONS"): 3164 self._match_l_paren() 3165 k = self._parse_string() 3166 self._match(TokenType.EQ) 3167 v = self._parse_string() 3168 options = [k, v] 3169 self._match_r_paren() 3170 3171 self._match(TokenType.ALIAS) 3172 return self.expression( 3173 exp.Cache, 3174 this=table, 3175 lazy=lazy, 3176 options=options, 3177 expression=self._parse_select(nested=True), 3178 ) 3179 3180 def _parse_partition(self) -> t.Optional[exp.Partition]: 3181 if not self._match_texts(self.PARTITION_KEYWORDS): 3182 return None 3183 3184 return self.expression( 3185 exp.Partition, 3186 subpartition=self._prev.text.upper() == "SUBPARTITION", 3187 expressions=self._parse_wrapped_csv(self._parse_assignment), 3188 ) 3189 3190 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3191 def _parse_value_expression() -> t.Optional[exp.Expression]: 3192 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3193 return exp.var(self._prev.text.upper()) 3194 return self._parse_expression() 3195 3196 if self._match(TokenType.L_PAREN): 3197 expressions = self._parse_csv(_parse_value_expression) 3198 self._match_r_paren() 3199 return self.expression(exp.Tuple, expressions=expressions) 3200 3201 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3202 expression = self._parse_expression() 3203 if expression: 3204 return self.expression(exp.Tuple, expressions=[expression]) 3205 return None 3206 3207 def _parse_projections(self) -> t.List[exp.Expression]: 3208 return self._parse_expressions() 3209 3210 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3211 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3212 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3213 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3214 ) 3215 elif self._match(TokenType.FROM): 3216 from_ = self._parse_from(skip_from_token=True, consume_pipe=True) 3217 # Support parentheses for duckdb FROM-first syntax 3218 select = self._parse_select(from_=from_) 3219 if select: 3220 if not select.args.get("from"): 3221 select.set("from", from_) 3222 this = select 3223 else: 3224 this = exp.select("*").from_(t.cast(exp.From, from_)) 3225 else: 3226 this = ( 3227 self._parse_table(consume_pipe=True) 3228 if table 3229 else self._parse_select(nested=True, parse_set_operation=False) 3230 ) 3231 3232 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3233 # in case a modifier (e.g. join) is following 3234 if table and isinstance(this, exp.Values) and this.alias: 3235 alias = this.args["alias"].pop() 3236 this = exp.Table(this=this, alias=alias) 3237 3238 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3239 3240 return this 3241 3242 def _parse_select( 3243 self, 3244 nested: bool = False, 3245 table: bool = False, 3246 parse_subquery_alias: bool = True, 3247 parse_set_operation: bool = True, 3248 consume_pipe: bool = True, 3249 from_: t.Optional[exp.From] = None, 3250 ) -> t.Optional[exp.Expression]: 3251 query = self._parse_select_query( 3252 nested=nested, 3253 table=table, 3254 parse_subquery_alias=parse_subquery_alias, 3255 parse_set_operation=parse_set_operation, 3256 ) 3257 3258 if consume_pipe and self._match(TokenType.PIPE_GT, advance=False): 3259 if not query and from_: 3260 query = exp.select("*").from_(from_) 3261 if isinstance(query, exp.Query): 3262 query = self._parse_pipe_syntax_query(query) 3263 query = query.subquery(copy=False) if query and table else query 3264 3265 return query 3266 3267 def _parse_select_query( 3268 self, 3269 nested: bool = False, 3270 table: bool = False, 3271 parse_subquery_alias: bool = True, 3272 parse_set_operation: bool = True, 3273 ) -> t.Optional[exp.Expression]: 3274 cte = self._parse_with() 3275 3276 if cte: 3277 this = self._parse_statement() 3278 3279 if not this: 3280 self.raise_error("Failed to parse any statement following CTE") 3281 return cte 3282 3283 if "with" in this.arg_types: 3284 this.set("with", cte) 3285 else: 3286 self.raise_error(f"{this.key} does not support CTE") 3287 this = cte 3288 3289 return this 3290 3291 # duckdb supports leading with FROM x 3292 from_ = ( 3293 self._parse_from(consume_pipe=True) 3294 if self._match(TokenType.FROM, advance=False) 3295 else None 3296 ) 3297 3298 if self._match(TokenType.SELECT): 3299 comments = self._prev_comments 3300 3301 hint = self._parse_hint() 3302 3303 if self._next and not self._next.token_type == TokenType.DOT: 3304 all_ = self._match(TokenType.ALL) 3305 distinct = self._match_set(self.DISTINCT_TOKENS) 3306 else: 3307 all_, distinct = None, None 3308 3309 kind = ( 3310 self._match(TokenType.ALIAS) 3311 and self._match_texts(("STRUCT", "VALUE")) 3312 and self._prev.text.upper() 3313 ) 3314 3315 if distinct: 3316 distinct = self.expression( 3317 exp.Distinct, 3318 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3319 ) 3320 3321 if all_ and distinct: 3322 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3323 3324 operation_modifiers = [] 3325 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3326 operation_modifiers.append(exp.var(self._prev.text.upper())) 3327 3328 limit = self._parse_limit(top=True) 3329 projections = self._parse_projections() 3330 3331 this = self.expression( 3332 exp.Select, 3333 kind=kind, 3334 hint=hint, 3335 distinct=distinct, 3336 expressions=projections, 3337 limit=limit, 3338 operation_modifiers=operation_modifiers or None, 3339 ) 3340 this.comments = comments 3341 3342 into = self._parse_into() 3343 if into: 3344 this.set("into", into) 3345 3346 if not from_: 3347 from_ = self._parse_from() 3348 3349 if from_: 3350 this.set("from", from_) 3351 3352 this = self._parse_query_modifiers(this) 3353 elif (table or nested) and self._match(TokenType.L_PAREN): 3354 this = self._parse_wrapped_select(table=table) 3355 3356 # We return early here so that the UNION isn't attached to the subquery by the 3357 # following call to _parse_set_operations, but instead becomes the parent node 3358 self._match_r_paren() 3359 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3360 elif self._match(TokenType.VALUES, advance=False): 3361 this = self._parse_derived_table_values() 3362 elif from_: 3363 this = exp.select("*").from_(from_.this, copy=False) 3364 elif self._match(TokenType.SUMMARIZE): 3365 table = self._match(TokenType.TABLE) 3366 this = self._parse_select() or self._parse_string() or self._parse_table() 3367 return self.expression(exp.Summarize, this=this, table=table) 3368 elif self._match(TokenType.DESCRIBE): 3369 this = self._parse_describe() 3370 elif self._match_text_seq("STREAM"): 3371 this = self._parse_function() 3372 if this: 3373 this = self.expression(exp.Stream, this=this) 3374 else: 3375 self._retreat(self._index - 1) 3376 else: 3377 this = None 3378 3379 return self._parse_set_operations(this) if parse_set_operation else this 3380 3381 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3382 self._match_text_seq("SEARCH") 3383 3384 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3385 3386 if not kind: 3387 return None 3388 3389 self._match_text_seq("FIRST", "BY") 3390 3391 return self.expression( 3392 exp.RecursiveWithSearch, 3393 kind=kind, 3394 this=self._parse_id_var(), 3395 expression=self._match_text_seq("SET") and self._parse_id_var(), 3396 using=self._match_text_seq("USING") and self._parse_id_var(), 3397 ) 3398 3399 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3400 if not skip_with_token and not self._match(TokenType.WITH): 3401 return None 3402 3403 comments = self._prev_comments 3404 recursive = self._match(TokenType.RECURSIVE) 3405 3406 last_comments = None 3407 expressions = [] 3408 while True: 3409 cte = self._parse_cte() 3410 if isinstance(cte, exp.CTE): 3411 expressions.append(cte) 3412 if last_comments: 3413 cte.add_comments(last_comments) 3414 3415 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3416 break 3417 else: 3418 self._match(TokenType.WITH) 3419 3420 last_comments = self._prev_comments 3421 3422 return self.expression( 3423 exp.With, 3424 comments=comments, 3425 expressions=expressions, 3426 recursive=recursive, 3427 search=self._parse_recursive_with_search(), 3428 ) 3429 3430 def _parse_cte(self) -> t.Optional[exp.CTE]: 3431 index = self._index 3432 3433 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3434 if not alias or not alias.this: 3435 self.raise_error("Expected CTE to have alias") 3436 3437 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3438 self._retreat(index) 3439 return None 3440 3441 comments = self._prev_comments 3442 3443 if self._match_text_seq("NOT", "MATERIALIZED"): 3444 materialized = False 3445 elif self._match_text_seq("MATERIALIZED"): 3446 materialized = True 3447 else: 3448 materialized = None 3449 3450 cte = self.expression( 3451 exp.CTE, 3452 this=self._parse_wrapped(self._parse_statement), 3453 alias=alias, 3454 materialized=materialized, 3455 comments=comments, 3456 ) 3457 3458 values = cte.this 3459 if isinstance(values, exp.Values): 3460 if values.alias: 3461 cte.set("this", exp.select("*").from_(values)) 3462 else: 3463 cte.set("this", exp.select("*").from_(exp.alias_(values, "_values", table=True))) 3464 3465 return cte 3466 3467 def _parse_table_alias( 3468 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3469 ) -> t.Optional[exp.TableAlias]: 3470 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3471 # so this section tries to parse the clause version and if it fails, it treats the token 3472 # as an identifier (alias) 3473 if self._can_parse_limit_or_offset(): 3474 return None 3475 3476 any_token = self._match(TokenType.ALIAS) 3477 alias = ( 3478 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3479 or self._parse_string_as_identifier() 3480 ) 3481 3482 index = self._index 3483 if self._match(TokenType.L_PAREN): 3484 columns = self._parse_csv(self._parse_function_parameter) 3485 self._match_r_paren() if columns else self._retreat(index) 3486 else: 3487 columns = None 3488 3489 if not alias and not columns: 3490 return None 3491 3492 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3493 3494 # We bubble up comments from the Identifier to the TableAlias 3495 if isinstance(alias, exp.Identifier): 3496 table_alias.add_comments(alias.pop_comments()) 3497 3498 return table_alias 3499 3500 def _parse_subquery( 3501 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3502 ) -> t.Optional[exp.Subquery]: 3503 if not this: 3504 return None 3505 3506 return self.expression( 3507 exp.Subquery, 3508 this=this, 3509 pivots=self._parse_pivots(), 3510 alias=self._parse_table_alias() if parse_alias else None, 3511 sample=self._parse_table_sample(), 3512 ) 3513 3514 def _implicit_unnests_to_explicit(self, this: E) -> E: 3515 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3516 3517 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3518 for i, join in enumerate(this.args.get("joins") or []): 3519 table = join.this 3520 normalized_table = table.copy() 3521 normalized_table.meta["maybe_column"] = True 3522 normalized_table = _norm(normalized_table, dialect=self.dialect) 3523 3524 if isinstance(table, exp.Table) and not join.args.get("on"): 3525 if normalized_table.parts[0].name in refs: 3526 table_as_column = table.to_column() 3527 unnest = exp.Unnest(expressions=[table_as_column]) 3528 3529 # Table.to_column creates a parent Alias node that we want to convert to 3530 # a TableAlias and attach to the Unnest, so it matches the parser's output 3531 if isinstance(table.args.get("alias"), exp.TableAlias): 3532 table_as_column.replace(table_as_column.this) 3533 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3534 3535 table.replace(unnest) 3536 3537 refs.add(normalized_table.alias_or_name) 3538 3539 return this 3540 3541 def _parse_query_modifiers( 3542 self, this: t.Optional[exp.Expression] 3543 ) -> t.Optional[exp.Expression]: 3544 if isinstance(this, self.MODIFIABLES): 3545 for join in self._parse_joins(): 3546 this.append("joins", join) 3547 for lateral in iter(self._parse_lateral, None): 3548 this.append("laterals", lateral) 3549 3550 while True: 3551 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3552 modifier_token = self._curr 3553 parser = self.QUERY_MODIFIER_PARSERS[modifier_token.token_type] 3554 key, expression = parser(self) 3555 3556 if expression: 3557 if this.args.get(key): 3558 self.raise_error( 3559 f"Found multiple '{modifier_token.text.upper()}' clauses", 3560 token=modifier_token, 3561 ) 3562 3563 this.set(key, expression) 3564 if key == "limit": 3565 offset = expression.args.pop("offset", None) 3566 3567 if offset: 3568 offset = exp.Offset(expression=offset) 3569 this.set("offset", offset) 3570 3571 limit_by_expressions = expression.expressions 3572 expression.set("expressions", None) 3573 offset.set("expressions", limit_by_expressions) 3574 continue 3575 break 3576 3577 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3578 this = self._implicit_unnests_to_explicit(this) 3579 3580 return this 3581 3582 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3583 start = self._curr 3584 while self._curr: 3585 self._advance() 3586 3587 end = self._tokens[self._index - 1] 3588 return exp.Hint(expressions=[self._find_sql(start, end)]) 3589 3590 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3591 return self._parse_function_call() 3592 3593 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3594 start_index = self._index 3595 should_fallback_to_string = False 3596 3597 hints = [] 3598 try: 3599 for hint in iter( 3600 lambda: self._parse_csv( 3601 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3602 ), 3603 [], 3604 ): 3605 hints.extend(hint) 3606 except ParseError: 3607 should_fallback_to_string = True 3608 3609 if should_fallback_to_string or self._curr: 3610 self._retreat(start_index) 3611 return self._parse_hint_fallback_to_string() 3612 3613 return self.expression(exp.Hint, expressions=hints) 3614 3615 def _parse_hint(self) -> t.Optional[exp.Hint]: 3616 if self._match(TokenType.HINT) and self._prev_comments: 3617 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3618 3619 return None 3620 3621 def _parse_into(self) -> t.Optional[exp.Into]: 3622 if not self._match(TokenType.INTO): 3623 return None 3624 3625 temp = self._match(TokenType.TEMPORARY) 3626 unlogged = self._match_text_seq("UNLOGGED") 3627 self._match(TokenType.TABLE) 3628 3629 return self.expression( 3630 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3631 ) 3632 3633 def _parse_from( 3634 self, 3635 joins: bool = False, 3636 skip_from_token: bool = False, 3637 consume_pipe: bool = False, 3638 ) -> t.Optional[exp.From]: 3639 if not skip_from_token and not self._match(TokenType.FROM): 3640 return None 3641 3642 return self.expression( 3643 exp.From, 3644 comments=self._prev_comments, 3645 this=self._parse_table(joins=joins, consume_pipe=consume_pipe), 3646 ) 3647 3648 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3649 return self.expression( 3650 exp.MatchRecognizeMeasure, 3651 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3652 this=self._parse_expression(), 3653 ) 3654 3655 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3656 if not self._match(TokenType.MATCH_RECOGNIZE): 3657 return None 3658 3659 self._match_l_paren() 3660 3661 partition = self._parse_partition_by() 3662 order = self._parse_order() 3663 3664 measures = ( 3665 self._parse_csv(self._parse_match_recognize_measure) 3666 if self._match_text_seq("MEASURES") 3667 else None 3668 ) 3669 3670 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3671 rows = exp.var("ONE ROW PER MATCH") 3672 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3673 text = "ALL ROWS PER MATCH" 3674 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3675 text += " SHOW EMPTY MATCHES" 3676 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3677 text += " OMIT EMPTY MATCHES" 3678 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3679 text += " WITH UNMATCHED ROWS" 3680 rows = exp.var(text) 3681 else: 3682 rows = None 3683 3684 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3685 text = "AFTER MATCH SKIP" 3686 if self._match_text_seq("PAST", "LAST", "ROW"): 3687 text += " PAST LAST ROW" 3688 elif self._match_text_seq("TO", "NEXT", "ROW"): 3689 text += " TO NEXT ROW" 3690 elif self._match_text_seq("TO", "FIRST"): 3691 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3692 elif self._match_text_seq("TO", "LAST"): 3693 text += f" TO LAST {self._advance_any().text}" # type: ignore 3694 after = exp.var(text) 3695 else: 3696 after = None 3697 3698 if self._match_text_seq("PATTERN"): 3699 self._match_l_paren() 3700 3701 if not self._curr: 3702 self.raise_error("Expecting )", self._curr) 3703 3704 paren = 1 3705 start = self._curr 3706 3707 while self._curr and paren > 0: 3708 if self._curr.token_type == TokenType.L_PAREN: 3709 paren += 1 3710 if self._curr.token_type == TokenType.R_PAREN: 3711 paren -= 1 3712 3713 end = self._prev 3714 self._advance() 3715 3716 if paren > 0: 3717 self.raise_error("Expecting )", self._curr) 3718 3719 pattern = exp.var(self._find_sql(start, end)) 3720 else: 3721 pattern = None 3722 3723 define = ( 3724 self._parse_csv(self._parse_name_as_expression) 3725 if self._match_text_seq("DEFINE") 3726 else None 3727 ) 3728 3729 self._match_r_paren() 3730 3731 return self.expression( 3732 exp.MatchRecognize, 3733 partition_by=partition, 3734 order=order, 3735 measures=measures, 3736 rows=rows, 3737 after=after, 3738 pattern=pattern, 3739 define=define, 3740 alias=self._parse_table_alias(), 3741 ) 3742 3743 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3744 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3745 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3746 cross_apply = False 3747 3748 if cross_apply is not None: 3749 this = self._parse_select(table=True) 3750 view = None 3751 outer = None 3752 elif self._match(TokenType.LATERAL): 3753 this = self._parse_select(table=True) 3754 view = self._match(TokenType.VIEW) 3755 outer = self._match(TokenType.OUTER) 3756 else: 3757 return None 3758 3759 if not this: 3760 this = ( 3761 self._parse_unnest() 3762 or self._parse_function() 3763 or self._parse_id_var(any_token=False) 3764 ) 3765 3766 while self._match(TokenType.DOT): 3767 this = exp.Dot( 3768 this=this, 3769 expression=self._parse_function() or self._parse_id_var(any_token=False), 3770 ) 3771 3772 ordinality: t.Optional[bool] = None 3773 3774 if view: 3775 table = self._parse_id_var(any_token=False) 3776 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3777 table_alias: t.Optional[exp.TableAlias] = self.expression( 3778 exp.TableAlias, this=table, columns=columns 3779 ) 3780 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3781 # We move the alias from the lateral's child node to the lateral itself 3782 table_alias = this.args["alias"].pop() 3783 else: 3784 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3785 table_alias = self._parse_table_alias() 3786 3787 return self.expression( 3788 exp.Lateral, 3789 this=this, 3790 view=view, 3791 outer=outer, 3792 alias=table_alias, 3793 cross_apply=cross_apply, 3794 ordinality=ordinality, 3795 ) 3796 3797 def _parse_join_parts( 3798 self, 3799 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3800 return ( 3801 self._match_set(self.JOIN_METHODS) and self._prev, 3802 self._match_set(self.JOIN_SIDES) and self._prev, 3803 self._match_set(self.JOIN_KINDS) and self._prev, 3804 ) 3805 3806 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3807 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3808 this = self._parse_column() 3809 if isinstance(this, exp.Column): 3810 return this.this 3811 return this 3812 3813 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3814 3815 def _parse_join( 3816 self, skip_join_token: bool = False, parse_bracket: bool = False 3817 ) -> t.Optional[exp.Join]: 3818 if self._match(TokenType.COMMA): 3819 table = self._try_parse(self._parse_table) 3820 cross_join = self.expression(exp.Join, this=table) if table else None 3821 3822 if cross_join and self.JOINS_HAVE_EQUAL_PRECEDENCE: 3823 cross_join.set("kind", "CROSS") 3824 3825 return cross_join 3826 3827 index = self._index 3828 method, side, kind = self._parse_join_parts() 3829 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3830 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3831 join_comments = self._prev_comments 3832 3833 if not skip_join_token and not join: 3834 self._retreat(index) 3835 kind = None 3836 method = None 3837 side = None 3838 3839 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3840 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3841 3842 if not skip_join_token and not join and not outer_apply and not cross_apply: 3843 return None 3844 3845 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3846 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3847 kwargs["expressions"] = self._parse_csv( 3848 lambda: self._parse_table(parse_bracket=parse_bracket) 3849 ) 3850 3851 if method: 3852 kwargs["method"] = method.text 3853 if side: 3854 kwargs["side"] = side.text 3855 if kind: 3856 kwargs["kind"] = kind.text 3857 if hint: 3858 kwargs["hint"] = hint 3859 3860 if self._match(TokenType.MATCH_CONDITION): 3861 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3862 3863 if self._match(TokenType.ON): 3864 kwargs["on"] = self._parse_assignment() 3865 elif self._match(TokenType.USING): 3866 kwargs["using"] = self._parse_using_identifiers() 3867 elif ( 3868 not method 3869 and not (outer_apply or cross_apply) 3870 and not isinstance(kwargs["this"], exp.Unnest) 3871 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3872 ): 3873 index = self._index 3874 joins: t.Optional[list] = list(self._parse_joins()) 3875 3876 if joins and self._match(TokenType.ON): 3877 kwargs["on"] = self._parse_assignment() 3878 elif joins and self._match(TokenType.USING): 3879 kwargs["using"] = self._parse_using_identifiers() 3880 else: 3881 joins = None 3882 self._retreat(index) 3883 3884 kwargs["this"].set("joins", joins if joins else None) 3885 3886 kwargs["pivots"] = self._parse_pivots() 3887 3888 comments = [c for token in (method, side, kind) if token for c in token.comments] 3889 comments = (join_comments or []) + comments 3890 3891 if ( 3892 self.ADD_JOIN_ON_TRUE 3893 and not kwargs.get("on") 3894 and not kwargs.get("using") 3895 and not kwargs.get("method") 3896 and kwargs.get("kind") in (None, "INNER", "OUTER") 3897 ): 3898 kwargs["on"] = exp.true() 3899 3900 return self.expression(exp.Join, comments=comments, **kwargs) 3901 3902 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3903 this = self._parse_assignment() 3904 3905 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3906 return this 3907 3908 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3909 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3910 3911 return this 3912 3913 def _parse_index_params(self) -> exp.IndexParameters: 3914 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3915 3916 if self._match(TokenType.L_PAREN, advance=False): 3917 columns = self._parse_wrapped_csv(self._parse_with_operator) 3918 else: 3919 columns = None 3920 3921 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3922 partition_by = self._parse_partition_by() 3923 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3924 tablespace = ( 3925 self._parse_var(any_token=True) 3926 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3927 else None 3928 ) 3929 where = self._parse_where() 3930 3931 on = self._parse_field() if self._match(TokenType.ON) else None 3932 3933 return self.expression( 3934 exp.IndexParameters, 3935 using=using, 3936 columns=columns, 3937 include=include, 3938 partition_by=partition_by, 3939 where=where, 3940 with_storage=with_storage, 3941 tablespace=tablespace, 3942 on=on, 3943 ) 3944 3945 def _parse_index( 3946 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3947 ) -> t.Optional[exp.Index]: 3948 if index or anonymous: 3949 unique = None 3950 primary = None 3951 amp = None 3952 3953 self._match(TokenType.ON) 3954 self._match(TokenType.TABLE) # hive 3955 table = self._parse_table_parts(schema=True) 3956 else: 3957 unique = self._match(TokenType.UNIQUE) 3958 primary = self._match_text_seq("PRIMARY") 3959 amp = self._match_text_seq("AMP") 3960 3961 if not self._match(TokenType.INDEX): 3962 return None 3963 3964 index = self._parse_id_var() 3965 table = None 3966 3967 params = self._parse_index_params() 3968 3969 return self.expression( 3970 exp.Index, 3971 this=index, 3972 table=table, 3973 unique=unique, 3974 primary=primary, 3975 amp=amp, 3976 params=params, 3977 ) 3978 3979 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3980 hints: t.List[exp.Expression] = [] 3981 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3982 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3983 hints.append( 3984 self.expression( 3985 exp.WithTableHint, 3986 expressions=self._parse_csv( 3987 lambda: self._parse_function() or self._parse_var(any_token=True) 3988 ), 3989 ) 3990 ) 3991 self._match_r_paren() 3992 else: 3993 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3994 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3995 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3996 3997 self._match_set((TokenType.INDEX, TokenType.KEY)) 3998 if self._match(TokenType.FOR): 3999 hint.set("target", self._advance_any() and self._prev.text.upper()) 4000 4001 hint.set("expressions", self._parse_wrapped_id_vars()) 4002 hints.append(hint) 4003 4004 return hints or None 4005 4006 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 4007 return ( 4008 (not schema and self._parse_function(optional_parens=False)) 4009 or self._parse_id_var(any_token=False) 4010 or self._parse_string_as_identifier() 4011 or self._parse_placeholder() 4012 ) 4013 4014 def _parse_table_parts( 4015 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 4016 ) -> exp.Table: 4017 catalog = None 4018 db = None 4019 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 4020 4021 while self._match(TokenType.DOT): 4022 if catalog: 4023 # This allows nesting the table in arbitrarily many dot expressions if needed 4024 table = self.expression( 4025 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 4026 ) 4027 else: 4028 catalog = db 4029 db = table 4030 # "" used for tsql FROM a..b case 4031 table = self._parse_table_part(schema=schema) or "" 4032 4033 if ( 4034 wildcard 4035 and self._is_connected() 4036 and (isinstance(table, exp.Identifier) or not table) 4037 and self._match(TokenType.STAR) 4038 ): 4039 if isinstance(table, exp.Identifier): 4040 table.args["this"] += "*" 4041 else: 4042 table = exp.Identifier(this="*") 4043 4044 # We bubble up comments from the Identifier to the Table 4045 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 4046 4047 if is_db_reference: 4048 catalog = db 4049 db = table 4050 table = None 4051 4052 if not table and not is_db_reference: 4053 self.raise_error(f"Expected table name but got {self._curr}") 4054 if not db and is_db_reference: 4055 self.raise_error(f"Expected database name but got {self._curr}") 4056 4057 table = self.expression( 4058 exp.Table, 4059 comments=comments, 4060 this=table, 4061 db=db, 4062 catalog=catalog, 4063 ) 4064 4065 changes = self._parse_changes() 4066 if changes: 4067 table.set("changes", changes) 4068 4069 at_before = self._parse_historical_data() 4070 if at_before: 4071 table.set("when", at_before) 4072 4073 pivots = self._parse_pivots() 4074 if pivots: 4075 table.set("pivots", pivots) 4076 4077 return table 4078 4079 def _parse_table( 4080 self, 4081 schema: bool = False, 4082 joins: bool = False, 4083 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 4084 parse_bracket: bool = False, 4085 is_db_reference: bool = False, 4086 parse_partition: bool = False, 4087 consume_pipe: bool = False, 4088 ) -> t.Optional[exp.Expression]: 4089 lateral = self._parse_lateral() 4090 if lateral: 4091 return lateral 4092 4093 unnest = self._parse_unnest() 4094 if unnest: 4095 return unnest 4096 4097 values = self._parse_derived_table_values() 4098 if values: 4099 return values 4100 4101 subquery = self._parse_select(table=True, consume_pipe=consume_pipe) 4102 if subquery: 4103 if not subquery.args.get("pivots"): 4104 subquery.set("pivots", self._parse_pivots()) 4105 return subquery 4106 4107 bracket = parse_bracket and self._parse_bracket(None) 4108 bracket = self.expression(exp.Table, this=bracket) if bracket else None 4109 4110 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 4111 self._parse_table 4112 ) 4113 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 4114 4115 only = self._match(TokenType.ONLY) 4116 4117 this = t.cast( 4118 exp.Expression, 4119 bracket 4120 or rows_from 4121 or self._parse_bracket( 4122 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 4123 ), 4124 ) 4125 4126 if only: 4127 this.set("only", only) 4128 4129 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 4130 self._match_text_seq("*") 4131 4132 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 4133 if parse_partition and self._match(TokenType.PARTITION, advance=False): 4134 this.set("partition", self._parse_partition()) 4135 4136 if schema: 4137 return self._parse_schema(this=this) 4138 4139 version = self._parse_version() 4140 4141 if version: 4142 this.set("version", version) 4143 4144 if self.dialect.ALIAS_POST_TABLESAMPLE: 4145 this.set("sample", self._parse_table_sample()) 4146 4147 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4148 if alias: 4149 this.set("alias", alias) 4150 4151 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4152 return self.expression( 4153 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 4154 ) 4155 4156 this.set("hints", self._parse_table_hints()) 4157 4158 if not this.args.get("pivots"): 4159 this.set("pivots", self._parse_pivots()) 4160 4161 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4162 this.set("sample", self._parse_table_sample()) 4163 4164 if joins: 4165 for join in self._parse_joins(): 4166 this.append("joins", join) 4167 4168 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4169 this.set("ordinality", True) 4170 this.set("alias", self._parse_table_alias()) 4171 4172 return this 4173 4174 def _parse_version(self) -> t.Optional[exp.Version]: 4175 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4176 this = "TIMESTAMP" 4177 elif self._match(TokenType.VERSION_SNAPSHOT): 4178 this = "VERSION" 4179 else: 4180 return None 4181 4182 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4183 kind = self._prev.text.upper() 4184 start = self._parse_bitwise() 4185 self._match_texts(("TO", "AND")) 4186 end = self._parse_bitwise() 4187 expression: t.Optional[exp.Expression] = self.expression( 4188 exp.Tuple, expressions=[start, end] 4189 ) 4190 elif self._match_text_seq("CONTAINED", "IN"): 4191 kind = "CONTAINED IN" 4192 expression = self.expression( 4193 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4194 ) 4195 elif self._match(TokenType.ALL): 4196 kind = "ALL" 4197 expression = None 4198 else: 4199 self._match_text_seq("AS", "OF") 4200 kind = "AS OF" 4201 expression = self._parse_type() 4202 4203 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4204 4205 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4206 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4207 index = self._index 4208 historical_data = None 4209 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4210 this = self._prev.text.upper() 4211 kind = ( 4212 self._match(TokenType.L_PAREN) 4213 and self._match_texts(self.HISTORICAL_DATA_KIND) 4214 and self._prev.text.upper() 4215 ) 4216 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4217 4218 if expression: 4219 self._match_r_paren() 4220 historical_data = self.expression( 4221 exp.HistoricalData, this=this, kind=kind, expression=expression 4222 ) 4223 else: 4224 self._retreat(index) 4225 4226 return historical_data 4227 4228 def _parse_changes(self) -> t.Optional[exp.Changes]: 4229 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4230 return None 4231 4232 information = self._parse_var(any_token=True) 4233 self._match_r_paren() 4234 4235 return self.expression( 4236 exp.Changes, 4237 information=information, 4238 at_before=self._parse_historical_data(), 4239 end=self._parse_historical_data(), 4240 ) 4241 4242 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4243 if not self._match(TokenType.UNNEST): 4244 return None 4245 4246 expressions = self._parse_wrapped_csv(self._parse_equality) 4247 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4248 4249 alias = self._parse_table_alias() if with_alias else None 4250 4251 if alias: 4252 if self.dialect.UNNEST_COLUMN_ONLY: 4253 if alias.args.get("columns"): 4254 self.raise_error("Unexpected extra column alias in unnest.") 4255 4256 alias.set("columns", [alias.this]) 4257 alias.set("this", None) 4258 4259 columns = alias.args.get("columns") or [] 4260 if offset and len(expressions) < len(columns): 4261 offset = columns.pop() 4262 4263 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4264 self._match(TokenType.ALIAS) 4265 offset = self._parse_id_var( 4266 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4267 ) or exp.to_identifier("offset") 4268 4269 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4270 4271 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4272 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4273 if not is_derived and not ( 4274 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4275 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4276 ): 4277 return None 4278 4279 expressions = self._parse_csv(self._parse_value) 4280 alias = self._parse_table_alias() 4281 4282 if is_derived: 4283 self._match_r_paren() 4284 4285 return self.expression( 4286 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4287 ) 4288 4289 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4290 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4291 as_modifier and self._match_text_seq("USING", "SAMPLE") 4292 ): 4293 return None 4294 4295 bucket_numerator = None 4296 bucket_denominator = None 4297 bucket_field = None 4298 percent = None 4299 size = None 4300 seed = None 4301 4302 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4303 matched_l_paren = self._match(TokenType.L_PAREN) 4304 4305 if self.TABLESAMPLE_CSV: 4306 num = None 4307 expressions = self._parse_csv(self._parse_primary) 4308 else: 4309 expressions = None 4310 num = ( 4311 self._parse_factor() 4312 if self._match(TokenType.NUMBER, advance=False) 4313 else self._parse_primary() or self._parse_placeholder() 4314 ) 4315 4316 if self._match_text_seq("BUCKET"): 4317 bucket_numerator = self._parse_number() 4318 self._match_text_seq("OUT", "OF") 4319 bucket_denominator = bucket_denominator = self._parse_number() 4320 self._match(TokenType.ON) 4321 bucket_field = self._parse_field() 4322 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4323 percent = num 4324 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4325 size = num 4326 else: 4327 percent = num 4328 4329 if matched_l_paren: 4330 self._match_r_paren() 4331 4332 if self._match(TokenType.L_PAREN): 4333 method = self._parse_var(upper=True) 4334 seed = self._match(TokenType.COMMA) and self._parse_number() 4335 self._match_r_paren() 4336 elif self._match_texts(("SEED", "REPEATABLE")): 4337 seed = self._parse_wrapped(self._parse_number) 4338 4339 if not method and self.DEFAULT_SAMPLING_METHOD: 4340 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4341 4342 return self.expression( 4343 exp.TableSample, 4344 expressions=expressions, 4345 method=method, 4346 bucket_numerator=bucket_numerator, 4347 bucket_denominator=bucket_denominator, 4348 bucket_field=bucket_field, 4349 percent=percent, 4350 size=size, 4351 seed=seed, 4352 ) 4353 4354 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4355 return list(iter(self._parse_pivot, None)) or None 4356 4357 def _parse_joins(self) -> t.Iterator[exp.Join]: 4358 return iter(self._parse_join, None) 4359 4360 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4361 if not self._match(TokenType.INTO): 4362 return None 4363 4364 return self.expression( 4365 exp.UnpivotColumns, 4366 this=self._match_text_seq("NAME") and self._parse_column(), 4367 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4368 ) 4369 4370 # https://duckdb.org/docs/sql/statements/pivot 4371 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4372 def _parse_on() -> t.Optional[exp.Expression]: 4373 this = self._parse_bitwise() 4374 4375 if self._match(TokenType.IN): 4376 # PIVOT ... ON col IN (row_val1, row_val2) 4377 return self._parse_in(this) 4378 if self._match(TokenType.ALIAS, advance=False): 4379 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4380 return self._parse_alias(this) 4381 4382 return this 4383 4384 this = self._parse_table() 4385 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4386 into = self._parse_unpivot_columns() 4387 using = self._match(TokenType.USING) and self._parse_csv( 4388 lambda: self._parse_alias(self._parse_function()) 4389 ) 4390 group = self._parse_group() 4391 4392 return self.expression( 4393 exp.Pivot, 4394 this=this, 4395 expressions=expressions, 4396 using=using, 4397 group=group, 4398 unpivot=is_unpivot, 4399 into=into, 4400 ) 4401 4402 def _parse_pivot_in(self) -> exp.In: 4403 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4404 this = self._parse_select_or_expression() 4405 4406 self._match(TokenType.ALIAS) 4407 alias = self._parse_bitwise() 4408 if alias: 4409 if isinstance(alias, exp.Column) and not alias.db: 4410 alias = alias.this 4411 return self.expression(exp.PivotAlias, this=this, alias=alias) 4412 4413 return this 4414 4415 value = self._parse_column() 4416 4417 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4418 self.raise_error("Expecting IN (") 4419 4420 if self._match(TokenType.ANY): 4421 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4422 else: 4423 exprs = self._parse_csv(_parse_aliased_expression) 4424 4425 self._match_r_paren() 4426 return self.expression(exp.In, this=value, expressions=exprs) 4427 4428 def _parse_pivot_aggregation(self) -> t.Optional[exp.Expression]: 4429 func = self._parse_function() 4430 if not func: 4431 if self._prev and self._prev.token_type == TokenType.COMMA: 4432 return None 4433 self.raise_error("Expecting an aggregation function in PIVOT") 4434 4435 return self._parse_alias(func) 4436 4437 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4438 index = self._index 4439 include_nulls = None 4440 4441 if self._match(TokenType.PIVOT): 4442 unpivot = False 4443 elif self._match(TokenType.UNPIVOT): 4444 unpivot = True 4445 4446 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4447 if self._match_text_seq("INCLUDE", "NULLS"): 4448 include_nulls = True 4449 elif self._match_text_seq("EXCLUDE", "NULLS"): 4450 include_nulls = False 4451 else: 4452 return None 4453 4454 expressions = [] 4455 4456 if not self._match(TokenType.L_PAREN): 4457 self._retreat(index) 4458 return None 4459 4460 if unpivot: 4461 expressions = self._parse_csv(self._parse_column) 4462 else: 4463 expressions = self._parse_csv(self._parse_pivot_aggregation) 4464 4465 if not expressions: 4466 self.raise_error("Failed to parse PIVOT's aggregation list") 4467 4468 if not self._match(TokenType.FOR): 4469 self.raise_error("Expecting FOR") 4470 4471 fields = [] 4472 while True: 4473 field = self._try_parse(self._parse_pivot_in) 4474 if not field: 4475 break 4476 fields.append(field) 4477 4478 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4479 self._parse_bitwise 4480 ) 4481 4482 group = self._parse_group() 4483 4484 self._match_r_paren() 4485 4486 pivot = self.expression( 4487 exp.Pivot, 4488 expressions=expressions, 4489 fields=fields, 4490 unpivot=unpivot, 4491 include_nulls=include_nulls, 4492 default_on_null=default_on_null, 4493 group=group, 4494 ) 4495 4496 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4497 pivot.set("alias", self._parse_table_alias()) 4498 4499 if not unpivot: 4500 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4501 4502 columns: t.List[exp.Expression] = [] 4503 all_fields = [] 4504 for pivot_field in pivot.fields: 4505 pivot_field_expressions = pivot_field.expressions 4506 4507 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4508 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4509 continue 4510 4511 all_fields.append( 4512 [ 4513 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4514 for fld in pivot_field_expressions 4515 ] 4516 ) 4517 4518 if all_fields: 4519 if names: 4520 all_fields.append(names) 4521 4522 # Generate all possible combinations of the pivot columns 4523 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4524 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4525 for fld_parts_tuple in itertools.product(*all_fields): 4526 fld_parts = list(fld_parts_tuple) 4527 4528 if names and self.PREFIXED_PIVOT_COLUMNS: 4529 # Move the "name" to the front of the list 4530 fld_parts.insert(0, fld_parts.pop(-1)) 4531 4532 columns.append(exp.to_identifier("_".join(fld_parts))) 4533 4534 pivot.set("columns", columns) 4535 4536 return pivot 4537 4538 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4539 return [agg.alias for agg in aggregations if agg.alias] 4540 4541 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4542 if not skip_where_token and not self._match(TokenType.PREWHERE): 4543 return None 4544 4545 return self.expression( 4546 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4547 ) 4548 4549 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4550 if not skip_where_token and not self._match(TokenType.WHERE): 4551 return None 4552 4553 return self.expression( 4554 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4555 ) 4556 4557 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4558 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4559 return None 4560 comments = self._prev_comments 4561 4562 elements: t.Dict[str, t.Any] = defaultdict(list) 4563 4564 if self._match(TokenType.ALL): 4565 elements["all"] = True 4566 elif self._match(TokenType.DISTINCT): 4567 elements["all"] = False 4568 4569 if self._match_set(self.QUERY_MODIFIER_TOKENS, advance=False): 4570 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4571 4572 while True: 4573 index = self._index 4574 4575 elements["expressions"].extend( 4576 self._parse_csv( 4577 lambda: None 4578 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4579 else self._parse_assignment() 4580 ) 4581 ) 4582 4583 before_with_index = self._index 4584 with_prefix = self._match(TokenType.WITH) 4585 4586 if self._match(TokenType.ROLLUP): 4587 elements["rollup"].append( 4588 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4589 ) 4590 elif self._match(TokenType.CUBE): 4591 elements["cube"].append( 4592 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4593 ) 4594 elif self._match(TokenType.GROUPING_SETS): 4595 elements["grouping_sets"].append( 4596 self.expression( 4597 exp.GroupingSets, 4598 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4599 ) 4600 ) 4601 elif self._match_text_seq("TOTALS"): 4602 elements["totals"] = True # type: ignore 4603 4604 if before_with_index <= self._index <= before_with_index + 1: 4605 self._retreat(before_with_index) 4606 break 4607 4608 if index == self._index: 4609 break 4610 4611 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4612 4613 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4614 return self.expression( 4615 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4616 ) 4617 4618 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4619 if self._match(TokenType.L_PAREN): 4620 grouping_set = self._parse_csv(self._parse_bitwise) 4621 self._match_r_paren() 4622 return self.expression(exp.Tuple, expressions=grouping_set) 4623 4624 return self._parse_column() 4625 4626 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4627 if not skip_having_token and not self._match(TokenType.HAVING): 4628 return None 4629 return self.expression( 4630 exp.Having, comments=self._prev_comments, this=self._parse_assignment() 4631 ) 4632 4633 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4634 if not self._match(TokenType.QUALIFY): 4635 return None 4636 return self.expression(exp.Qualify, this=self._parse_assignment()) 4637 4638 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4639 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4640 exp.Prior, this=self._parse_bitwise() 4641 ) 4642 connect = self._parse_assignment() 4643 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4644 return connect 4645 4646 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4647 if skip_start_token: 4648 start = None 4649 elif self._match(TokenType.START_WITH): 4650 start = self._parse_assignment() 4651 else: 4652 return None 4653 4654 self._match(TokenType.CONNECT_BY) 4655 nocycle = self._match_text_seq("NOCYCLE") 4656 connect = self._parse_connect_with_prior() 4657 4658 if not start and self._match(TokenType.START_WITH): 4659 start = self._parse_assignment() 4660 4661 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4662 4663 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4664 this = self._parse_id_var(any_token=True) 4665 if self._match(TokenType.ALIAS): 4666 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4667 return this 4668 4669 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4670 if self._match_text_seq("INTERPOLATE"): 4671 return self._parse_wrapped_csv(self._parse_name_as_expression) 4672 return None 4673 4674 def _parse_order( 4675 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4676 ) -> t.Optional[exp.Expression]: 4677 siblings = None 4678 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4679 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4680 return this 4681 4682 siblings = True 4683 4684 return self.expression( 4685 exp.Order, 4686 comments=self._prev_comments, 4687 this=this, 4688 expressions=self._parse_csv(self._parse_ordered), 4689 siblings=siblings, 4690 ) 4691 4692 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4693 if not self._match(token): 4694 return None 4695 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4696 4697 def _parse_ordered( 4698 self, parse_method: t.Optional[t.Callable] = None 4699 ) -> t.Optional[exp.Ordered]: 4700 this = parse_method() if parse_method else self._parse_assignment() 4701 if not this: 4702 return None 4703 4704 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4705 this = exp.var("ALL") 4706 4707 asc = self._match(TokenType.ASC) 4708 desc = self._match(TokenType.DESC) or (asc and False) 4709 4710 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4711 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4712 4713 nulls_first = is_nulls_first or False 4714 explicitly_null_ordered = is_nulls_first or is_nulls_last 4715 4716 if ( 4717 not explicitly_null_ordered 4718 and ( 4719 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4720 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4721 ) 4722 and self.dialect.NULL_ORDERING != "nulls_are_last" 4723 ): 4724 nulls_first = True 4725 4726 if self._match_text_seq("WITH", "FILL"): 4727 with_fill = self.expression( 4728 exp.WithFill, 4729 **{ # type: ignore 4730 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4731 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4732 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4733 "interpolate": self._parse_interpolate(), 4734 }, 4735 ) 4736 else: 4737 with_fill = None 4738 4739 return self.expression( 4740 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4741 ) 4742 4743 def _parse_limit_options(self) -> exp.LimitOptions: 4744 percent = self._match(TokenType.PERCENT) 4745 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4746 self._match_text_seq("ONLY") 4747 with_ties = self._match_text_seq("WITH", "TIES") 4748 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4749 4750 def _parse_limit( 4751 self, 4752 this: t.Optional[exp.Expression] = None, 4753 top: bool = False, 4754 skip_limit_token: bool = False, 4755 ) -> t.Optional[exp.Expression]: 4756 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4757 comments = self._prev_comments 4758 if top: 4759 limit_paren = self._match(TokenType.L_PAREN) 4760 expression = self._parse_term() if limit_paren else self._parse_number() 4761 4762 if limit_paren: 4763 self._match_r_paren() 4764 4765 limit_options = self._parse_limit_options() 4766 else: 4767 limit_options = None 4768 expression = self._parse_term() 4769 4770 if self._match(TokenType.COMMA): 4771 offset = expression 4772 expression = self._parse_term() 4773 else: 4774 offset = None 4775 4776 limit_exp = self.expression( 4777 exp.Limit, 4778 this=this, 4779 expression=expression, 4780 offset=offset, 4781 comments=comments, 4782 limit_options=limit_options, 4783 expressions=self._parse_limit_by(), 4784 ) 4785 4786 return limit_exp 4787 4788 if self._match(TokenType.FETCH): 4789 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4790 direction = self._prev.text.upper() if direction else "FIRST" 4791 4792 count = self._parse_field(tokens=self.FETCH_TOKENS) 4793 4794 return self.expression( 4795 exp.Fetch, 4796 direction=direction, 4797 count=count, 4798 limit_options=self._parse_limit_options(), 4799 ) 4800 4801 return this 4802 4803 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4804 if not self._match(TokenType.OFFSET): 4805 return this 4806 4807 count = self._parse_term() 4808 self._match_set((TokenType.ROW, TokenType.ROWS)) 4809 4810 return self.expression( 4811 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4812 ) 4813 4814 def _can_parse_limit_or_offset(self) -> bool: 4815 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4816 return False 4817 4818 index = self._index 4819 result = bool( 4820 self._try_parse(self._parse_limit, retreat=True) 4821 or self._try_parse(self._parse_offset, retreat=True) 4822 ) 4823 self._retreat(index) 4824 return result 4825 4826 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4827 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4828 4829 def _parse_locks(self) -> t.List[exp.Lock]: 4830 locks = [] 4831 while True: 4832 update, key = None, None 4833 if self._match_text_seq("FOR", "UPDATE"): 4834 update = True 4835 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4836 "LOCK", "IN", "SHARE", "MODE" 4837 ): 4838 update = False 4839 elif self._match_text_seq("FOR", "KEY", "SHARE"): 4840 update, key = False, True 4841 elif self._match_text_seq("FOR", "NO", "KEY", "UPDATE"): 4842 update, key = True, True 4843 else: 4844 break 4845 4846 expressions = None 4847 if self._match_text_seq("OF"): 4848 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4849 4850 wait: t.Optional[bool | exp.Expression] = None 4851 if self._match_text_seq("NOWAIT"): 4852 wait = True 4853 elif self._match_text_seq("WAIT"): 4854 wait = self._parse_primary() 4855 elif self._match_text_seq("SKIP", "LOCKED"): 4856 wait = False 4857 4858 locks.append( 4859 self.expression( 4860 exp.Lock, update=update, expressions=expressions, wait=wait, key=key 4861 ) 4862 ) 4863 4864 return locks 4865 4866 def parse_set_operation( 4867 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4868 ) -> t.Optional[exp.Expression]: 4869 start = self._index 4870 _, side_token, kind_token = self._parse_join_parts() 4871 4872 side = side_token.text if side_token else None 4873 kind = kind_token.text if kind_token else None 4874 4875 if not self._match_set(self.SET_OPERATIONS): 4876 self._retreat(start) 4877 return None 4878 4879 token_type = self._prev.token_type 4880 4881 if token_type == TokenType.UNION: 4882 operation: t.Type[exp.SetOperation] = exp.Union 4883 elif token_type == TokenType.EXCEPT: 4884 operation = exp.Except 4885 else: 4886 operation = exp.Intersect 4887 4888 comments = self._prev.comments 4889 4890 if self._match(TokenType.DISTINCT): 4891 distinct: t.Optional[bool] = True 4892 elif self._match(TokenType.ALL): 4893 distinct = False 4894 else: 4895 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4896 if distinct is None: 4897 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4898 4899 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4900 "STRICT", "CORRESPONDING" 4901 ) 4902 if self._match_text_seq("CORRESPONDING"): 4903 by_name = True 4904 if not side and not kind: 4905 kind = "INNER" 4906 4907 on_column_list = None 4908 if by_name and self._match_texts(("ON", "BY")): 4909 on_column_list = self._parse_wrapped_csv(self._parse_column) 4910 4911 expression = self._parse_select( 4912 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4913 ) 4914 4915 return self.expression( 4916 operation, 4917 comments=comments, 4918 this=this, 4919 distinct=distinct, 4920 by_name=by_name, 4921 expression=expression, 4922 side=side, 4923 kind=kind, 4924 on=on_column_list, 4925 ) 4926 4927 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4928 while this: 4929 setop = self.parse_set_operation(this) 4930 if not setop: 4931 break 4932 this = setop 4933 4934 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4935 expression = this.expression 4936 4937 if expression: 4938 for arg in self.SET_OP_MODIFIERS: 4939 expr = expression.args.get(arg) 4940 if expr: 4941 this.set(arg, expr.pop()) 4942 4943 return this 4944 4945 def _parse_expression(self) -> t.Optional[exp.Expression]: 4946 return self._parse_alias(self._parse_assignment()) 4947 4948 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4949 this = self._parse_disjunction() 4950 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4951 # This allows us to parse <non-identifier token> := <expr> 4952 this = exp.column( 4953 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4954 ) 4955 4956 while self._match_set(self.ASSIGNMENT): 4957 if isinstance(this, exp.Column) and len(this.parts) == 1: 4958 this = this.this 4959 4960 this = self.expression( 4961 self.ASSIGNMENT[self._prev.token_type], 4962 this=this, 4963 comments=self._prev_comments, 4964 expression=self._parse_assignment(), 4965 ) 4966 4967 return this 4968 4969 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4970 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4971 4972 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4973 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4974 4975 def _parse_equality(self) -> t.Optional[exp.Expression]: 4976 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4977 4978 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4979 return self._parse_tokens(self._parse_range, self.COMPARISON) 4980 4981 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4982 this = this or self._parse_bitwise() 4983 negate = self._match(TokenType.NOT) 4984 4985 if self._match_set(self.RANGE_PARSERS): 4986 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4987 if not expression: 4988 return this 4989 4990 this = expression 4991 elif self._match(TokenType.ISNULL): 4992 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4993 4994 # Postgres supports ISNULL and NOTNULL for conditions. 4995 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4996 if self._match(TokenType.NOTNULL): 4997 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4998 this = self.expression(exp.Not, this=this) 4999 5000 if negate: 5001 this = self._negate_range(this) 5002 5003 if self._match(TokenType.IS): 5004 this = self._parse_is(this) 5005 5006 return this 5007 5008 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5009 if not this: 5010 return this 5011 5012 return self.expression(exp.Not, this=this) 5013 5014 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5015 index = self._index - 1 5016 negate = self._match(TokenType.NOT) 5017 5018 if self._match_text_seq("DISTINCT", "FROM"): 5019 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 5020 return self.expression(klass, this=this, expression=self._parse_bitwise()) 5021 5022 if self._match(TokenType.JSON): 5023 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 5024 5025 if self._match_text_seq("WITH"): 5026 _with = True 5027 elif self._match_text_seq("WITHOUT"): 5028 _with = False 5029 else: 5030 _with = None 5031 5032 unique = self._match(TokenType.UNIQUE) 5033 self._match_text_seq("KEYS") 5034 expression: t.Optional[exp.Expression] = self.expression( 5035 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 5036 ) 5037 else: 5038 expression = self._parse_primary() or self._parse_null() 5039 if not expression: 5040 self._retreat(index) 5041 return None 5042 5043 this = self.expression(exp.Is, this=this, expression=expression) 5044 return self.expression(exp.Not, this=this) if negate else this 5045 5046 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 5047 unnest = self._parse_unnest(with_alias=False) 5048 if unnest: 5049 this = self.expression(exp.In, this=this, unnest=unnest) 5050 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 5051 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 5052 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 5053 5054 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 5055 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 5056 else: 5057 this = self.expression(exp.In, this=this, expressions=expressions) 5058 5059 if matched_l_paren: 5060 self._match_r_paren(this) 5061 elif not self._match(TokenType.R_BRACKET, expression=this): 5062 self.raise_error("Expecting ]") 5063 else: 5064 this = self.expression(exp.In, this=this, field=self._parse_column()) 5065 5066 return this 5067 5068 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 5069 symmetric = None 5070 if self._match_text_seq("SYMMETRIC"): 5071 symmetric = True 5072 elif self._match_text_seq("ASYMMETRIC"): 5073 symmetric = False 5074 5075 low = self._parse_bitwise() 5076 self._match(TokenType.AND) 5077 high = self._parse_bitwise() 5078 5079 return self.expression( 5080 exp.Between, 5081 this=this, 5082 low=low, 5083 high=high, 5084 symmetric=symmetric, 5085 ) 5086 5087 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5088 if not self._match(TokenType.ESCAPE): 5089 return this 5090 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 5091 5092 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 5093 index = self._index 5094 5095 if not self._match(TokenType.INTERVAL) and match_interval: 5096 return None 5097 5098 if self._match(TokenType.STRING, advance=False): 5099 this = self._parse_primary() 5100 else: 5101 this = self._parse_term() 5102 5103 if not this or ( 5104 isinstance(this, exp.Column) 5105 and not this.table 5106 and not this.this.quoted 5107 and this.name.upper() in ("IS", "ROWS") 5108 ): 5109 self._retreat(index) 5110 return None 5111 5112 # handle day-time format interval span with omitted units: 5113 # INTERVAL '<number days> hh[:][mm[:ss[.ff]]]' <maybe `unit TO unit`> 5114 interval_span_units_omitted = None 5115 if ( 5116 this 5117 and this.is_string 5118 and self.SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT 5119 and exp.INTERVAL_DAY_TIME_RE.match(this.name) 5120 ): 5121 index = self._index 5122 5123 # Var "TO" Var 5124 first_unit = self._parse_var(any_token=True, upper=True) 5125 second_unit = None 5126 if first_unit and self._match_text_seq("TO"): 5127 second_unit = self._parse_var(any_token=True, upper=True) 5128 5129 interval_span_units_omitted = not (first_unit and second_unit) 5130 5131 self._retreat(index) 5132 5133 unit = ( 5134 None 5135 if interval_span_units_omitted 5136 else ( 5137 self._parse_function() 5138 or ( 5139 not self._match(TokenType.ALIAS, advance=False) 5140 and self._parse_var(any_token=True, upper=True) 5141 ) 5142 ) 5143 ) 5144 5145 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 5146 # each INTERVAL expression into this canonical form so it's easy to transpile 5147 if this and this.is_number: 5148 this = exp.Literal.string(this.to_py()) 5149 elif this and this.is_string: 5150 parts = exp.INTERVAL_STRING_RE.findall(this.name) 5151 if parts and unit: 5152 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 5153 unit = None 5154 self._retreat(self._index - 1) 5155 5156 if len(parts) == 1: 5157 this = exp.Literal.string(parts[0][0]) 5158 unit = self.expression(exp.Var, this=parts[0][1].upper()) 5159 5160 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 5161 unit = self.expression( 5162 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 5163 ) 5164 5165 interval = self.expression(exp.Interval, this=this, unit=unit) 5166 5167 index = self._index 5168 self._match(TokenType.PLUS) 5169 5170 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 5171 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 5172 return self.expression( 5173 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 5174 ) 5175 5176 self._retreat(index) 5177 return interval 5178 5179 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 5180 this = self._parse_term() 5181 5182 while True: 5183 if self._match_set(self.BITWISE): 5184 this = self.expression( 5185 self.BITWISE[self._prev.token_type], 5186 this=this, 5187 expression=self._parse_term(), 5188 ) 5189 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 5190 this = self.expression( 5191 exp.DPipe, 5192 this=this, 5193 expression=self._parse_term(), 5194 safe=not self.dialect.STRICT_STRING_CONCAT, 5195 ) 5196 elif self._match(TokenType.DQMARK): 5197 this = self.expression( 5198 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 5199 ) 5200 elif self._match_pair(TokenType.LT, TokenType.LT): 5201 this = self.expression( 5202 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 5203 ) 5204 elif self._match_pair(TokenType.GT, TokenType.GT): 5205 this = self.expression( 5206 exp.BitwiseRightShift, this=this, expression=self._parse_term() 5207 ) 5208 else: 5209 break 5210 5211 return this 5212 5213 def _parse_term(self) -> t.Optional[exp.Expression]: 5214 this = self._parse_factor() 5215 5216 while self._match_set(self.TERM): 5217 klass = self.TERM[self._prev.token_type] 5218 comments = self._prev_comments 5219 expression = self._parse_factor() 5220 5221 this = self.expression(klass, this=this, comments=comments, expression=expression) 5222 5223 if isinstance(this, exp.Collate): 5224 expr = this.expression 5225 5226 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5227 # fallback to Identifier / Var 5228 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5229 ident = expr.this 5230 if isinstance(ident, exp.Identifier): 5231 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5232 5233 return this 5234 5235 def _parse_factor(self) -> t.Optional[exp.Expression]: 5236 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5237 this = parse_method() 5238 5239 while self._match_set(self.FACTOR): 5240 klass = self.FACTOR[self._prev.token_type] 5241 comments = self._prev_comments 5242 expression = parse_method() 5243 5244 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5245 self._retreat(self._index - 1) 5246 return this 5247 5248 this = self.expression(klass, this=this, comments=comments, expression=expression) 5249 5250 if isinstance(this, exp.Div): 5251 this.args["typed"] = self.dialect.TYPED_DIVISION 5252 this.args["safe"] = self.dialect.SAFE_DIVISION 5253 5254 return this 5255 5256 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5257 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5258 5259 def _parse_unary(self) -> t.Optional[exp.Expression]: 5260 if self._match_set(self.UNARY_PARSERS): 5261 return self.UNARY_PARSERS[self._prev.token_type](self) 5262 return self._parse_at_time_zone(self._parse_type()) 5263 5264 def _parse_type( 5265 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5266 ) -> t.Optional[exp.Expression]: 5267 interval = parse_interval and self._parse_interval() 5268 if interval: 5269 return interval 5270 5271 index = self._index 5272 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5273 5274 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5275 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5276 if isinstance(data_type, exp.Cast): 5277 # This constructor can contain ops directly after it, for instance struct unnesting: 5278 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5279 return self._parse_column_ops(data_type) 5280 5281 if data_type: 5282 index2 = self._index 5283 this = self._parse_primary() 5284 5285 if isinstance(this, exp.Literal): 5286 literal = this.name 5287 this = self._parse_column_ops(this) 5288 5289 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5290 if parser: 5291 return parser(self, this, data_type) 5292 5293 if ( 5294 self.ZONE_AWARE_TIMESTAMP_CONSTRUCTOR 5295 and data_type.is_type(exp.DataType.Type.TIMESTAMP) 5296 and TIME_ZONE_RE.search(literal) 5297 ): 5298 data_type = exp.DataType.build("TIMESTAMPTZ") 5299 5300 return self.expression(exp.Cast, this=this, to=data_type) 5301 5302 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5303 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5304 # 5305 # If the index difference here is greater than 1, that means the parser itself must have 5306 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5307 # 5308 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5309 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5310 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5311 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5312 # 5313 # In these cases, we don't really want to return the converted type, but instead retreat 5314 # and try to parse a Column or Identifier in the section below. 5315 if data_type.expressions and index2 - index > 1: 5316 self._retreat(index2) 5317 return self._parse_column_ops(data_type) 5318 5319 self._retreat(index) 5320 5321 if fallback_to_identifier: 5322 return self._parse_id_var() 5323 5324 this = self._parse_column() 5325 return this and self._parse_column_ops(this) 5326 5327 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5328 this = self._parse_type() 5329 if not this: 5330 return None 5331 5332 if isinstance(this, exp.Column) and not this.table: 5333 this = exp.var(this.name.upper()) 5334 5335 return self.expression( 5336 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5337 ) 5338 5339 def _parse_user_defined_type(self, identifier: exp.Identifier) -> t.Optional[exp.Expression]: 5340 type_name = identifier.name 5341 5342 while self._match(TokenType.DOT): 5343 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5344 5345 return exp.DataType.build(type_name, dialect=self.dialect, udt=True) 5346 5347 def _parse_types( 5348 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5349 ) -> t.Optional[exp.Expression]: 5350 index = self._index 5351 5352 this: t.Optional[exp.Expression] = None 5353 prefix = self._match_text_seq("SYSUDTLIB", ".") 5354 5355 if self._match_set(self.TYPE_TOKENS): 5356 type_token = self._prev.token_type 5357 else: 5358 type_token = None 5359 identifier = allow_identifiers and self._parse_id_var( 5360 any_token=False, tokens=(TokenType.VAR,) 5361 ) 5362 if isinstance(identifier, exp.Identifier): 5363 try: 5364 tokens = self.dialect.tokenize(identifier.name) 5365 except TokenError: 5366 tokens = None 5367 5368 if tokens and len(tokens) == 1 and tokens[0].token_type in self.TYPE_TOKENS: 5369 type_token = tokens[0].token_type 5370 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5371 this = self._parse_user_defined_type(identifier) 5372 else: 5373 self._retreat(self._index - 1) 5374 return None 5375 else: 5376 return None 5377 5378 if type_token == TokenType.PSEUDO_TYPE: 5379 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5380 5381 if type_token == TokenType.OBJECT_IDENTIFIER: 5382 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5383 5384 # https://materialize.com/docs/sql/types/map/ 5385 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5386 key_type = self._parse_types( 5387 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5388 ) 5389 if not self._match(TokenType.FARROW): 5390 self._retreat(index) 5391 return None 5392 5393 value_type = self._parse_types( 5394 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5395 ) 5396 if not self._match(TokenType.R_BRACKET): 5397 self._retreat(index) 5398 return None 5399 5400 return exp.DataType( 5401 this=exp.DataType.Type.MAP, 5402 expressions=[key_type, value_type], 5403 nested=True, 5404 prefix=prefix, 5405 ) 5406 5407 nested = type_token in self.NESTED_TYPE_TOKENS 5408 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5409 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5410 expressions = None 5411 maybe_func = False 5412 5413 if self._match(TokenType.L_PAREN): 5414 if is_struct: 5415 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5416 elif nested: 5417 expressions = self._parse_csv( 5418 lambda: self._parse_types( 5419 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5420 ) 5421 ) 5422 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5423 this = expressions[0] 5424 this.set("nullable", True) 5425 self._match_r_paren() 5426 return this 5427 elif type_token in self.ENUM_TYPE_TOKENS: 5428 expressions = self._parse_csv(self._parse_equality) 5429 elif is_aggregate: 5430 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5431 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5432 ) 5433 if not func_or_ident: 5434 return None 5435 expressions = [func_or_ident] 5436 if self._match(TokenType.COMMA): 5437 expressions.extend( 5438 self._parse_csv( 5439 lambda: self._parse_types( 5440 check_func=check_func, 5441 schema=schema, 5442 allow_identifiers=allow_identifiers, 5443 ) 5444 ) 5445 ) 5446 else: 5447 expressions = self._parse_csv(self._parse_type_size) 5448 5449 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5450 if type_token == TokenType.VECTOR and len(expressions) == 2: 5451 expressions = self._parse_vector_expressions(expressions) 5452 5453 if not self._match(TokenType.R_PAREN): 5454 self._retreat(index) 5455 return None 5456 5457 maybe_func = True 5458 5459 values: t.Optional[t.List[exp.Expression]] = None 5460 5461 if nested and self._match(TokenType.LT): 5462 if is_struct: 5463 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5464 else: 5465 expressions = self._parse_csv( 5466 lambda: self._parse_types( 5467 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5468 ) 5469 ) 5470 5471 if not self._match(TokenType.GT): 5472 self.raise_error("Expecting >") 5473 5474 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5475 values = self._parse_csv(self._parse_assignment) 5476 if not values and is_struct: 5477 values = None 5478 self._retreat(self._index - 1) 5479 else: 5480 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5481 5482 if type_token in self.TIMESTAMPS: 5483 if self._match_text_seq("WITH", "TIME", "ZONE"): 5484 maybe_func = False 5485 tz_type = ( 5486 exp.DataType.Type.TIMETZ 5487 if type_token in self.TIMES 5488 else exp.DataType.Type.TIMESTAMPTZ 5489 ) 5490 this = exp.DataType(this=tz_type, expressions=expressions) 5491 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5492 maybe_func = False 5493 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5494 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5495 maybe_func = False 5496 elif type_token == TokenType.INTERVAL: 5497 unit = self._parse_var(upper=True) 5498 if unit: 5499 if self._match_text_seq("TO"): 5500 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5501 5502 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5503 else: 5504 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5505 elif type_token == TokenType.VOID: 5506 this = exp.DataType(this=exp.DataType.Type.NULL) 5507 5508 if maybe_func and check_func: 5509 index2 = self._index 5510 peek = self._parse_string() 5511 5512 if not peek: 5513 self._retreat(index) 5514 return None 5515 5516 self._retreat(index2) 5517 5518 if not this: 5519 if self._match_text_seq("UNSIGNED"): 5520 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5521 if not unsigned_type_token: 5522 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5523 5524 type_token = unsigned_type_token or type_token 5525 5526 # NULLABLE without parentheses can be a column (Presto/Trino) 5527 if type_token == TokenType.NULLABLE and not expressions: 5528 self._retreat(index) 5529 return None 5530 5531 this = exp.DataType( 5532 this=exp.DataType.Type[type_token.value], 5533 expressions=expressions, 5534 nested=nested, 5535 prefix=prefix, 5536 ) 5537 5538 # Empty arrays/structs are allowed 5539 if values is not None: 5540 cls = exp.Struct if is_struct else exp.Array 5541 this = exp.cast(cls(expressions=values), this, copy=False) 5542 5543 elif expressions: 5544 this.set("expressions", expressions) 5545 5546 # https://materialize.com/docs/sql/types/list/#type-name 5547 while self._match(TokenType.LIST): 5548 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5549 5550 index = self._index 5551 5552 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5553 matched_array = self._match(TokenType.ARRAY) 5554 5555 while self._curr: 5556 datatype_token = self._prev.token_type 5557 matched_l_bracket = self._match(TokenType.L_BRACKET) 5558 5559 if (not matched_l_bracket and not matched_array) or ( 5560 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5561 ): 5562 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5563 # not to be confused with the fixed size array parsing 5564 break 5565 5566 matched_array = False 5567 values = self._parse_csv(self._parse_assignment) or None 5568 if ( 5569 values 5570 and not schema 5571 and ( 5572 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5573 ) 5574 ): 5575 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5576 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5577 self._retreat(index) 5578 break 5579 5580 this = exp.DataType( 5581 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5582 ) 5583 self._match(TokenType.R_BRACKET) 5584 5585 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5586 converter = self.TYPE_CONVERTERS.get(this.this) 5587 if converter: 5588 this = converter(t.cast(exp.DataType, this)) 5589 5590 return this 5591 5592 def _parse_vector_expressions( 5593 self, expressions: t.List[exp.Expression] 5594 ) -> t.List[exp.Expression]: 5595 return [exp.DataType.build(expressions[0].name, dialect=self.dialect), *expressions[1:]] 5596 5597 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5598 index = self._index 5599 5600 if ( 5601 self._curr 5602 and self._next 5603 and self._curr.token_type in self.TYPE_TOKENS 5604 and self._next.token_type in self.TYPE_TOKENS 5605 ): 5606 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5607 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5608 this = self._parse_id_var() 5609 else: 5610 this = ( 5611 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5612 or self._parse_id_var() 5613 ) 5614 5615 self._match(TokenType.COLON) 5616 5617 if ( 5618 type_required 5619 and not isinstance(this, exp.DataType) 5620 and not self._match_set(self.TYPE_TOKENS, advance=False) 5621 ): 5622 self._retreat(index) 5623 return self._parse_types() 5624 5625 return self._parse_column_def(this) 5626 5627 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5628 if not self._match_text_seq("AT", "TIME", "ZONE"): 5629 return this 5630 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5631 5632 def _parse_column(self) -> t.Optional[exp.Expression]: 5633 this = self._parse_column_reference() 5634 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5635 5636 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5637 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5638 5639 return column 5640 5641 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5642 this = self._parse_field() 5643 if ( 5644 not this 5645 and self._match(TokenType.VALUES, advance=False) 5646 and self.VALUES_FOLLOWED_BY_PAREN 5647 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5648 ): 5649 this = self._parse_id_var() 5650 5651 if isinstance(this, exp.Identifier): 5652 # We bubble up comments from the Identifier to the Column 5653 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5654 5655 return this 5656 5657 def _parse_colon_as_variant_extract( 5658 self, this: t.Optional[exp.Expression] 5659 ) -> t.Optional[exp.Expression]: 5660 casts = [] 5661 json_path = [] 5662 escape = None 5663 5664 while self._match(TokenType.COLON): 5665 start_index = self._index 5666 5667 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5668 path = self._parse_column_ops( 5669 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5670 ) 5671 5672 # The cast :: operator has a lower precedence than the extraction operator :, so 5673 # we rearrange the AST appropriately to avoid casting the JSON path 5674 while isinstance(path, exp.Cast): 5675 casts.append(path.to) 5676 path = path.this 5677 5678 if casts: 5679 dcolon_offset = next( 5680 i 5681 for i, t in enumerate(self._tokens[start_index:]) 5682 if t.token_type == TokenType.DCOLON 5683 ) 5684 end_token = self._tokens[start_index + dcolon_offset - 1] 5685 else: 5686 end_token = self._prev 5687 5688 if path: 5689 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5690 # it'll roundtrip to a string literal in GET_PATH 5691 if isinstance(path, exp.Identifier) and path.quoted: 5692 escape = True 5693 5694 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5695 5696 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5697 # Databricks transforms it back to the colon/dot notation 5698 if json_path: 5699 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5700 5701 if json_path_expr: 5702 json_path_expr.set("escape", escape) 5703 5704 this = self.expression( 5705 exp.JSONExtract, 5706 this=this, 5707 expression=json_path_expr, 5708 variant_extract=True, 5709 requires_json=self.JSON_EXTRACT_REQUIRES_JSON_EXPRESSION, 5710 ) 5711 5712 while casts: 5713 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5714 5715 return this 5716 5717 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5718 return self._parse_types() 5719 5720 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5721 this = self._parse_bracket(this) 5722 5723 while self._match_set(self.COLUMN_OPERATORS): 5724 op_token = self._prev.token_type 5725 op = self.COLUMN_OPERATORS.get(op_token) 5726 5727 if op_token in self.CAST_COLUMN_OPERATORS: 5728 field = self._parse_dcolon() 5729 if not field: 5730 self.raise_error("Expected type") 5731 elif op and self._curr: 5732 field = self._parse_column_reference() or self._parse_bitwise() 5733 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5734 field = self._parse_column_ops(field) 5735 else: 5736 field = self._parse_field(any_token=True, anonymous_func=True) 5737 5738 # Function calls can be qualified, e.g., x.y.FOO() 5739 # This converts the final AST to a series of Dots leading to the function call 5740 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5741 if isinstance(field, (exp.Func, exp.Window)) and this: 5742 this = this.transform( 5743 lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n 5744 ) 5745 5746 if op: 5747 this = op(self, this, field) 5748 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5749 this = self.expression( 5750 exp.Column, 5751 comments=this.comments, 5752 this=field, 5753 table=this.this, 5754 db=this.args.get("table"), 5755 catalog=this.args.get("db"), 5756 ) 5757 elif isinstance(field, exp.Window): 5758 # Move the exp.Dot's to the window's function 5759 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5760 field.set("this", window_func) 5761 this = field 5762 else: 5763 this = self.expression(exp.Dot, this=this, expression=field) 5764 5765 if field and field.comments: 5766 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5767 5768 this = self._parse_bracket(this) 5769 5770 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5771 5772 def _parse_paren(self) -> t.Optional[exp.Expression]: 5773 if not self._match(TokenType.L_PAREN): 5774 return None 5775 5776 comments = self._prev_comments 5777 query = self._parse_select() 5778 5779 if query: 5780 expressions = [query] 5781 else: 5782 expressions = self._parse_expressions() 5783 5784 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5785 5786 if not this and self._match(TokenType.R_PAREN, advance=False): 5787 this = self.expression(exp.Tuple) 5788 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5789 this = self._parse_subquery(this=this, parse_alias=False) 5790 elif isinstance(this, exp.Subquery): 5791 this = self._parse_subquery(this=self._parse_set_operations(this), parse_alias=False) 5792 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5793 this = self.expression(exp.Tuple, expressions=expressions) 5794 else: 5795 this = self.expression(exp.Paren, this=this) 5796 5797 if this: 5798 this.add_comments(comments) 5799 5800 self._match_r_paren(expression=this) 5801 5802 if isinstance(this, exp.Paren) and isinstance(this.this, exp.AggFunc): 5803 return self._parse_window(this) 5804 5805 return this 5806 5807 def _parse_primary(self) -> t.Optional[exp.Expression]: 5808 if self._match_set(self.PRIMARY_PARSERS): 5809 token_type = self._prev.token_type 5810 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5811 5812 if token_type == TokenType.STRING: 5813 expressions = [primary] 5814 while self._match(TokenType.STRING): 5815 expressions.append(exp.Literal.string(self._prev.text)) 5816 5817 if len(expressions) > 1: 5818 return self.expression(exp.Concat, expressions=expressions) 5819 5820 return primary 5821 5822 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5823 return exp.Literal.number(f"0.{self._prev.text}") 5824 5825 return self._parse_paren() 5826 5827 def _parse_field( 5828 self, 5829 any_token: bool = False, 5830 tokens: t.Optional[t.Collection[TokenType]] = None, 5831 anonymous_func: bool = False, 5832 ) -> t.Optional[exp.Expression]: 5833 if anonymous_func: 5834 field = ( 5835 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5836 or self._parse_primary() 5837 ) 5838 else: 5839 field = self._parse_primary() or self._parse_function( 5840 anonymous=anonymous_func, any_token=any_token 5841 ) 5842 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5843 5844 def _parse_function( 5845 self, 5846 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5847 anonymous: bool = False, 5848 optional_parens: bool = True, 5849 any_token: bool = False, 5850 ) -> t.Optional[exp.Expression]: 5851 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5852 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5853 fn_syntax = False 5854 if ( 5855 self._match(TokenType.L_BRACE, advance=False) 5856 and self._next 5857 and self._next.text.upper() == "FN" 5858 ): 5859 self._advance(2) 5860 fn_syntax = True 5861 5862 func = self._parse_function_call( 5863 functions=functions, 5864 anonymous=anonymous, 5865 optional_parens=optional_parens, 5866 any_token=any_token, 5867 ) 5868 5869 if fn_syntax: 5870 self._match(TokenType.R_BRACE) 5871 5872 return func 5873 5874 def _parse_function_args(self, alias: bool = False) -> t.List[exp.Expression]: 5875 return self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5876 5877 def _parse_function_call( 5878 self, 5879 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5880 anonymous: bool = False, 5881 optional_parens: bool = True, 5882 any_token: bool = False, 5883 ) -> t.Optional[exp.Expression]: 5884 if not self._curr: 5885 return None 5886 5887 comments = self._curr.comments 5888 prev = self._prev 5889 token = self._curr 5890 token_type = self._curr.token_type 5891 this = self._curr.text 5892 upper = this.upper() 5893 5894 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5895 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5896 self._advance() 5897 return self._parse_window(parser(self)) 5898 5899 if not self._next or self._next.token_type != TokenType.L_PAREN: 5900 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5901 self._advance() 5902 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5903 5904 return None 5905 5906 if any_token: 5907 if token_type in self.RESERVED_TOKENS: 5908 return None 5909 elif token_type not in self.FUNC_TOKENS: 5910 return None 5911 5912 self._advance(2) 5913 5914 parser = self.FUNCTION_PARSERS.get(upper) 5915 if parser and not anonymous: 5916 this = parser(self) 5917 else: 5918 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5919 5920 if subquery_predicate: 5921 expr = None 5922 if self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5923 expr = self._parse_select() 5924 self._match_r_paren() 5925 elif prev and prev.token_type in (TokenType.LIKE, TokenType.ILIKE): 5926 # Backtrack one token since we've consumed the L_PAREN here. Instead, we'd like 5927 # to parse "LIKE [ANY | ALL] (...)" as a whole into an exp.Tuple or exp.Paren 5928 self._advance(-1) 5929 expr = self._parse_bitwise() 5930 5931 if expr: 5932 return self.expression(subquery_predicate, comments=comments, this=expr) 5933 5934 if functions is None: 5935 functions = self.FUNCTIONS 5936 5937 function = functions.get(upper) 5938 known_function = function and not anonymous 5939 5940 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5941 args = self._parse_function_args(alias) 5942 5943 post_func_comments = self._curr and self._curr.comments 5944 if known_function and post_func_comments: 5945 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5946 # call we'll construct it as exp.Anonymous, even if it's "known" 5947 if any( 5948 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5949 for comment in post_func_comments 5950 ): 5951 known_function = False 5952 5953 if alias and known_function: 5954 args = self._kv_to_prop_eq(args) 5955 5956 if known_function: 5957 func_builder = t.cast(t.Callable, function) 5958 5959 if "dialect" in func_builder.__code__.co_varnames: 5960 func = func_builder(args, dialect=self.dialect) 5961 else: 5962 func = func_builder(args) 5963 5964 func = self.validate_expression(func, args) 5965 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5966 func.meta["name"] = this 5967 5968 this = func 5969 else: 5970 if token_type == TokenType.IDENTIFIER: 5971 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5972 5973 this = self.expression(exp.Anonymous, this=this, expressions=args) 5974 this = this.update_positions(token) 5975 5976 if isinstance(this, exp.Expression): 5977 this.add_comments(comments) 5978 5979 self._match_r_paren(this) 5980 return self._parse_window(this) 5981 5982 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5983 return expression 5984 5985 def _kv_to_prop_eq( 5986 self, expressions: t.List[exp.Expression], parse_map: bool = False 5987 ) -> t.List[exp.Expression]: 5988 transformed = [] 5989 5990 for index, e in enumerate(expressions): 5991 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5992 if isinstance(e, exp.Alias): 5993 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5994 5995 if not isinstance(e, exp.PropertyEQ): 5996 e = self.expression( 5997 exp.PropertyEQ, 5998 this=e.this if parse_map else exp.to_identifier(e.this.name), 5999 expression=e.expression, 6000 ) 6001 6002 if isinstance(e.this, exp.Column): 6003 e.this.replace(e.this.this) 6004 else: 6005 e = self._to_prop_eq(e, index) 6006 6007 transformed.append(e) 6008 6009 return transformed 6010 6011 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 6012 return self._parse_statement() 6013 6014 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 6015 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 6016 6017 def _parse_user_defined_function( 6018 self, kind: t.Optional[TokenType] = None 6019 ) -> t.Optional[exp.Expression]: 6020 this = self._parse_table_parts(schema=True) 6021 6022 if not self._match(TokenType.L_PAREN): 6023 return this 6024 6025 expressions = self._parse_csv(self._parse_function_parameter) 6026 self._match_r_paren() 6027 return self.expression( 6028 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 6029 ) 6030 6031 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 6032 literal = self._parse_primary() 6033 if literal: 6034 return self.expression(exp.Introducer, this=token.text, expression=literal) 6035 6036 return self._identifier_expression(token) 6037 6038 def _parse_session_parameter(self) -> exp.SessionParameter: 6039 kind = None 6040 this = self._parse_id_var() or self._parse_primary() 6041 6042 if this and self._match(TokenType.DOT): 6043 kind = this.name 6044 this = self._parse_var() or self._parse_primary() 6045 6046 return self.expression(exp.SessionParameter, this=this, kind=kind) 6047 6048 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 6049 return self._parse_id_var() 6050 6051 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 6052 index = self._index 6053 6054 if self._match(TokenType.L_PAREN): 6055 expressions = t.cast( 6056 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 6057 ) 6058 6059 if not self._match(TokenType.R_PAREN): 6060 self._retreat(index) 6061 else: 6062 expressions = [self._parse_lambda_arg()] 6063 6064 if self._match_set(self.LAMBDAS): 6065 return self.LAMBDAS[self._prev.token_type](self, expressions) 6066 6067 self._retreat(index) 6068 6069 this: t.Optional[exp.Expression] 6070 6071 if self._match(TokenType.DISTINCT): 6072 this = self.expression( 6073 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 6074 ) 6075 else: 6076 this = self._parse_select_or_expression(alias=alias) 6077 6078 return self._parse_limit( 6079 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 6080 ) 6081 6082 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6083 index = self._index 6084 if not self._match(TokenType.L_PAREN): 6085 return this 6086 6087 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 6088 # expr can be of both types 6089 if self._match_set(self.SELECT_START_TOKENS): 6090 self._retreat(index) 6091 return this 6092 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 6093 self._match_r_paren() 6094 return self.expression(exp.Schema, this=this, expressions=args) 6095 6096 def _parse_field_def(self) -> t.Optional[exp.Expression]: 6097 return self._parse_column_def(self._parse_field(any_token=True)) 6098 6099 def _parse_column_def( 6100 self, this: t.Optional[exp.Expression], computed_column: bool = True 6101 ) -> t.Optional[exp.Expression]: 6102 # column defs are not really columns, they're identifiers 6103 if isinstance(this, exp.Column): 6104 this = this.this 6105 6106 if not computed_column: 6107 self._match(TokenType.ALIAS) 6108 6109 kind = self._parse_types(schema=True) 6110 6111 if self._match_text_seq("FOR", "ORDINALITY"): 6112 return self.expression(exp.ColumnDef, this=this, ordinality=True) 6113 6114 constraints: t.List[exp.Expression] = [] 6115 6116 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 6117 ("ALIAS", "MATERIALIZED") 6118 ): 6119 persisted = self._prev.text.upper() == "MATERIALIZED" 6120 constraint_kind = exp.ComputedColumnConstraint( 6121 this=self._parse_assignment(), 6122 persisted=persisted or self._match_text_seq("PERSISTED"), 6123 data_type=exp.Var(this="AUTO") 6124 if self._match_text_seq("AUTO") 6125 else self._parse_types(), 6126 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 6127 ) 6128 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 6129 elif ( 6130 kind 6131 and self._match(TokenType.ALIAS, advance=False) 6132 and ( 6133 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 6134 or (self._next and self._next.token_type == TokenType.L_PAREN) 6135 ) 6136 ): 6137 self._advance() 6138 constraints.append( 6139 self.expression( 6140 exp.ColumnConstraint, 6141 kind=exp.ComputedColumnConstraint( 6142 this=self._parse_disjunction(), 6143 persisted=self._match_texts(("STORED", "VIRTUAL")) 6144 and self._prev.text.upper() == "STORED", 6145 ), 6146 ) 6147 ) 6148 6149 while True: 6150 constraint = self._parse_column_constraint() 6151 if not constraint: 6152 break 6153 constraints.append(constraint) 6154 6155 if not kind and not constraints: 6156 return this 6157 6158 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 6159 6160 def _parse_auto_increment( 6161 self, 6162 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 6163 start = None 6164 increment = None 6165 order = None 6166 6167 if self._match(TokenType.L_PAREN, advance=False): 6168 args = self._parse_wrapped_csv(self._parse_bitwise) 6169 start = seq_get(args, 0) 6170 increment = seq_get(args, 1) 6171 elif self._match_text_seq("START"): 6172 start = self._parse_bitwise() 6173 self._match_text_seq("INCREMENT") 6174 increment = self._parse_bitwise() 6175 if self._match_text_seq("ORDER"): 6176 order = True 6177 elif self._match_text_seq("NOORDER"): 6178 order = False 6179 6180 if start and increment: 6181 return exp.GeneratedAsIdentityColumnConstraint( 6182 start=start, increment=increment, this=False, order=order 6183 ) 6184 6185 return exp.AutoIncrementColumnConstraint() 6186 6187 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 6188 if not self._match_text_seq("REFRESH"): 6189 self._retreat(self._index - 1) 6190 return None 6191 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 6192 6193 def _parse_compress(self) -> exp.CompressColumnConstraint: 6194 if self._match(TokenType.L_PAREN, advance=False): 6195 return self.expression( 6196 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 6197 ) 6198 6199 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 6200 6201 def _parse_generated_as_identity( 6202 self, 6203 ) -> ( 6204 exp.GeneratedAsIdentityColumnConstraint 6205 | exp.ComputedColumnConstraint 6206 | exp.GeneratedAsRowColumnConstraint 6207 ): 6208 if self._match_text_seq("BY", "DEFAULT"): 6209 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 6210 this = self.expression( 6211 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 6212 ) 6213 else: 6214 self._match_text_seq("ALWAYS") 6215 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 6216 6217 self._match(TokenType.ALIAS) 6218 6219 if self._match_text_seq("ROW"): 6220 start = self._match_text_seq("START") 6221 if not start: 6222 self._match(TokenType.END) 6223 hidden = self._match_text_seq("HIDDEN") 6224 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 6225 6226 identity = self._match_text_seq("IDENTITY") 6227 6228 if self._match(TokenType.L_PAREN): 6229 if self._match(TokenType.START_WITH): 6230 this.set("start", self._parse_bitwise()) 6231 if self._match_text_seq("INCREMENT", "BY"): 6232 this.set("increment", self._parse_bitwise()) 6233 if self._match_text_seq("MINVALUE"): 6234 this.set("minvalue", self._parse_bitwise()) 6235 if self._match_text_seq("MAXVALUE"): 6236 this.set("maxvalue", self._parse_bitwise()) 6237 6238 if self._match_text_seq("CYCLE"): 6239 this.set("cycle", True) 6240 elif self._match_text_seq("NO", "CYCLE"): 6241 this.set("cycle", False) 6242 6243 if not identity: 6244 this.set("expression", self._parse_range()) 6245 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 6246 args = self._parse_csv(self._parse_bitwise) 6247 this.set("start", seq_get(args, 0)) 6248 this.set("increment", seq_get(args, 1)) 6249 6250 self._match_r_paren() 6251 6252 return this 6253 6254 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 6255 self._match_text_seq("LENGTH") 6256 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 6257 6258 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 6259 if self._match_text_seq("NULL"): 6260 return self.expression(exp.NotNullColumnConstraint) 6261 if self._match_text_seq("CASESPECIFIC"): 6262 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 6263 if self._match_text_seq("FOR", "REPLICATION"): 6264 return self.expression(exp.NotForReplicationColumnConstraint) 6265 6266 # Unconsume the `NOT` token 6267 self._retreat(self._index - 1) 6268 return None 6269 6270 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 6271 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 6272 6273 procedure_option_follows = ( 6274 self._match(TokenType.WITH, advance=False) 6275 and self._next 6276 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6277 ) 6278 6279 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6280 return self.expression( 6281 exp.ColumnConstraint, 6282 this=this, 6283 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 6284 ) 6285 6286 return this 6287 6288 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6289 if not self._match(TokenType.CONSTRAINT): 6290 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6291 6292 return self.expression( 6293 exp.Constraint, 6294 this=self._parse_id_var(), 6295 expressions=self._parse_unnamed_constraints(), 6296 ) 6297 6298 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6299 constraints = [] 6300 while True: 6301 constraint = self._parse_unnamed_constraint() or self._parse_function() 6302 if not constraint: 6303 break 6304 constraints.append(constraint) 6305 6306 return constraints 6307 6308 def _parse_unnamed_constraint( 6309 self, constraints: t.Optional[t.Collection[str]] = None 6310 ) -> t.Optional[exp.Expression]: 6311 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6312 constraints or self.CONSTRAINT_PARSERS 6313 ): 6314 return None 6315 6316 constraint = self._prev.text.upper() 6317 if constraint not in self.CONSTRAINT_PARSERS: 6318 self.raise_error(f"No parser found for schema constraint {constraint}.") 6319 6320 return self.CONSTRAINT_PARSERS[constraint](self) 6321 6322 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6323 return self._parse_id_var(any_token=False) 6324 6325 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6326 self._match_texts(("KEY", "INDEX")) 6327 return self.expression( 6328 exp.UniqueColumnConstraint, 6329 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6330 this=self._parse_schema(self._parse_unique_key()), 6331 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6332 on_conflict=self._parse_on_conflict(), 6333 options=self._parse_key_constraint_options(), 6334 ) 6335 6336 def _parse_key_constraint_options(self) -> t.List[str]: 6337 options = [] 6338 while True: 6339 if not self._curr: 6340 break 6341 6342 if self._match(TokenType.ON): 6343 action = None 6344 on = self._advance_any() and self._prev.text 6345 6346 if self._match_text_seq("NO", "ACTION"): 6347 action = "NO ACTION" 6348 elif self._match_text_seq("CASCADE"): 6349 action = "CASCADE" 6350 elif self._match_text_seq("RESTRICT"): 6351 action = "RESTRICT" 6352 elif self._match_pair(TokenType.SET, TokenType.NULL): 6353 action = "SET NULL" 6354 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6355 action = "SET DEFAULT" 6356 else: 6357 self.raise_error("Invalid key constraint") 6358 6359 options.append(f"ON {on} {action}") 6360 else: 6361 var = self._parse_var_from_options( 6362 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6363 ) 6364 if not var: 6365 break 6366 options.append(var.name) 6367 6368 return options 6369 6370 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6371 if match and not self._match(TokenType.REFERENCES): 6372 return None 6373 6374 expressions = None 6375 this = self._parse_table(schema=True) 6376 options = self._parse_key_constraint_options() 6377 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6378 6379 def _parse_foreign_key(self) -> exp.ForeignKey: 6380 expressions = ( 6381 self._parse_wrapped_id_vars() 6382 if not self._match(TokenType.REFERENCES, advance=False) 6383 else None 6384 ) 6385 reference = self._parse_references() 6386 on_options = {} 6387 6388 while self._match(TokenType.ON): 6389 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6390 self.raise_error("Expected DELETE or UPDATE") 6391 6392 kind = self._prev.text.lower() 6393 6394 if self._match_text_seq("NO", "ACTION"): 6395 action = "NO ACTION" 6396 elif self._match(TokenType.SET): 6397 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6398 action = "SET " + self._prev.text.upper() 6399 else: 6400 self._advance() 6401 action = self._prev.text.upper() 6402 6403 on_options[kind] = action 6404 6405 return self.expression( 6406 exp.ForeignKey, 6407 expressions=expressions, 6408 reference=reference, 6409 options=self._parse_key_constraint_options(), 6410 **on_options, # type: ignore 6411 ) 6412 6413 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6414 return self._parse_ordered() or self._parse_field() 6415 6416 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6417 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6418 self._retreat(self._index - 1) 6419 return None 6420 6421 id_vars = self._parse_wrapped_id_vars() 6422 return self.expression( 6423 exp.PeriodForSystemTimeConstraint, 6424 this=seq_get(id_vars, 0), 6425 expression=seq_get(id_vars, 1), 6426 ) 6427 6428 def _parse_primary_key( 6429 self, wrapped_optional: bool = False, in_props: bool = False 6430 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6431 desc = ( 6432 self._match_set((TokenType.ASC, TokenType.DESC)) 6433 and self._prev.token_type == TokenType.DESC 6434 ) 6435 6436 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6437 return self.expression( 6438 exp.PrimaryKeyColumnConstraint, 6439 desc=desc, 6440 options=self._parse_key_constraint_options(), 6441 ) 6442 6443 expressions = self._parse_wrapped_csv( 6444 self._parse_primary_key_part, optional=wrapped_optional 6445 ) 6446 6447 return self.expression( 6448 exp.PrimaryKey, 6449 expressions=expressions, 6450 include=self._parse_index_params(), 6451 options=self._parse_key_constraint_options(), 6452 ) 6453 6454 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6455 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6456 6457 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6458 """ 6459 Parses a datetime column in ODBC format. We parse the column into the corresponding 6460 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6461 same as we did for `DATE('yyyy-mm-dd')`. 6462 6463 Reference: 6464 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6465 """ 6466 self._match(TokenType.VAR) 6467 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6468 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6469 if not self._match(TokenType.R_BRACE): 6470 self.raise_error("Expected }") 6471 return expression 6472 6473 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6474 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6475 return this 6476 6477 if self.MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS: 6478 map_token = seq_get(self._tokens, self._index - 2) 6479 parse_map = map_token is not None and map_token.text.upper() == "MAP" 6480 else: 6481 parse_map = False 6482 6483 bracket_kind = self._prev.token_type 6484 if ( 6485 bracket_kind == TokenType.L_BRACE 6486 and self._curr 6487 and self._curr.token_type == TokenType.VAR 6488 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6489 ): 6490 return self._parse_odbc_datetime_literal() 6491 6492 expressions = self._parse_csv( 6493 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6494 ) 6495 6496 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6497 self.raise_error("Expected ]") 6498 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6499 self.raise_error("Expected }") 6500 6501 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6502 if bracket_kind == TokenType.L_BRACE: 6503 this = self.expression( 6504 exp.Struct, 6505 expressions=self._kv_to_prop_eq(expressions=expressions, parse_map=parse_map), 6506 ) 6507 elif not this: 6508 this = build_array_constructor( 6509 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6510 ) 6511 else: 6512 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6513 if constructor_type: 6514 return build_array_constructor( 6515 constructor_type, 6516 args=expressions, 6517 bracket_kind=bracket_kind, 6518 dialect=self.dialect, 6519 ) 6520 6521 expressions = apply_index_offset( 6522 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6523 ) 6524 this = self.expression( 6525 exp.Bracket, 6526 this=this, 6527 expressions=expressions, 6528 comments=this.pop_comments(), 6529 ) 6530 6531 self._add_comments(this) 6532 return self._parse_bracket(this) 6533 6534 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6535 if self._match(TokenType.COLON): 6536 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6537 return this 6538 6539 def _parse_case(self) -> t.Optional[exp.Expression]: 6540 if self._match(TokenType.DOT, advance=False): 6541 # Avoid raising on valid expressions like case.*, supported by, e.g., spark & snowflake 6542 self._retreat(self._index - 1) 6543 return None 6544 6545 ifs = [] 6546 default = None 6547 6548 comments = self._prev_comments 6549 expression = self._parse_assignment() 6550 6551 while self._match(TokenType.WHEN): 6552 this = self._parse_assignment() 6553 self._match(TokenType.THEN) 6554 then = self._parse_assignment() 6555 ifs.append(self.expression(exp.If, this=this, true=then)) 6556 6557 if self._match(TokenType.ELSE): 6558 default = self._parse_assignment() 6559 6560 if not self._match(TokenType.END): 6561 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6562 default = exp.column("interval") 6563 else: 6564 self.raise_error("Expected END after CASE", self._prev) 6565 6566 return self.expression( 6567 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6568 ) 6569 6570 def _parse_if(self) -> t.Optional[exp.Expression]: 6571 if self._match(TokenType.L_PAREN): 6572 args = self._parse_csv( 6573 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6574 ) 6575 this = self.validate_expression(exp.If.from_arg_list(args), args) 6576 self._match_r_paren() 6577 else: 6578 index = self._index - 1 6579 6580 if self.NO_PAREN_IF_COMMANDS and index == 0: 6581 return self._parse_as_command(self._prev) 6582 6583 condition = self._parse_assignment() 6584 6585 if not condition: 6586 self._retreat(index) 6587 return None 6588 6589 self._match(TokenType.THEN) 6590 true = self._parse_assignment() 6591 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6592 self._match(TokenType.END) 6593 this = self.expression(exp.If, this=condition, true=true, false=false) 6594 6595 return this 6596 6597 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6598 if not self._match_text_seq("VALUE", "FOR"): 6599 self._retreat(self._index - 1) 6600 return None 6601 6602 return self.expression( 6603 exp.NextValueFor, 6604 this=self._parse_column(), 6605 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6606 ) 6607 6608 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6609 this = self._parse_function() or self._parse_var_or_string(upper=True) 6610 6611 if self._match(TokenType.FROM): 6612 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6613 6614 if not self._match(TokenType.COMMA): 6615 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6616 6617 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6618 6619 def _parse_gap_fill(self) -> exp.GapFill: 6620 self._match(TokenType.TABLE) 6621 this = self._parse_table() 6622 6623 self._match(TokenType.COMMA) 6624 args = [this, *self._parse_csv(self._parse_lambda)] 6625 6626 gap_fill = exp.GapFill.from_arg_list(args) 6627 return self.validate_expression(gap_fill, args) 6628 6629 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6630 this = self._parse_assignment() 6631 6632 if not self._match(TokenType.ALIAS): 6633 if self._match(TokenType.COMMA): 6634 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6635 6636 self.raise_error("Expected AS after CAST") 6637 6638 fmt = None 6639 to = self._parse_types() 6640 6641 default = self._match(TokenType.DEFAULT) 6642 if default: 6643 default = self._parse_bitwise() 6644 self._match_text_seq("ON", "CONVERSION", "ERROR") 6645 6646 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6647 fmt_string = self._parse_string() 6648 fmt = self._parse_at_time_zone(fmt_string) 6649 6650 if not to: 6651 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6652 if to.this in exp.DataType.TEMPORAL_TYPES: 6653 this = self.expression( 6654 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6655 this=this, 6656 format=exp.Literal.string( 6657 format_time( 6658 fmt_string.this if fmt_string else "", 6659 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6660 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6661 ) 6662 ), 6663 safe=safe, 6664 ) 6665 6666 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6667 this.set("zone", fmt.args["zone"]) 6668 return this 6669 elif not to: 6670 self.raise_error("Expected TYPE after CAST") 6671 elif isinstance(to, exp.Identifier): 6672 to = exp.DataType.build(to.name, dialect=self.dialect, udt=True) 6673 elif to.this == exp.DataType.Type.CHAR: 6674 if self._match(TokenType.CHARACTER_SET): 6675 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6676 6677 return self.build_cast( 6678 strict=strict, 6679 this=this, 6680 to=to, 6681 format=fmt, 6682 safe=safe, 6683 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6684 default=default, 6685 ) 6686 6687 def _parse_string_agg(self) -> exp.GroupConcat: 6688 if self._match(TokenType.DISTINCT): 6689 args: t.List[t.Optional[exp.Expression]] = [ 6690 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6691 ] 6692 if self._match(TokenType.COMMA): 6693 args.extend(self._parse_csv(self._parse_assignment)) 6694 else: 6695 args = self._parse_csv(self._parse_assignment) # type: ignore 6696 6697 if self._match_text_seq("ON", "OVERFLOW"): 6698 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6699 if self._match_text_seq("ERROR"): 6700 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6701 else: 6702 self._match_text_seq("TRUNCATE") 6703 on_overflow = self.expression( 6704 exp.OverflowTruncateBehavior, 6705 this=self._parse_string(), 6706 with_count=( 6707 self._match_text_seq("WITH", "COUNT") 6708 or not self._match_text_seq("WITHOUT", "COUNT") 6709 ), 6710 ) 6711 else: 6712 on_overflow = None 6713 6714 index = self._index 6715 if not self._match(TokenType.R_PAREN) and args: 6716 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6717 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6718 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6719 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6720 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6721 6722 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6723 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6724 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6725 if not self._match_text_seq("WITHIN", "GROUP"): 6726 self._retreat(index) 6727 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6728 6729 # The corresponding match_r_paren will be called in parse_function (caller) 6730 self._match_l_paren() 6731 6732 return self.expression( 6733 exp.GroupConcat, 6734 this=self._parse_order(this=seq_get(args, 0)), 6735 separator=seq_get(args, 1), 6736 on_overflow=on_overflow, 6737 ) 6738 6739 def _parse_convert( 6740 self, strict: bool, safe: t.Optional[bool] = None 6741 ) -> t.Optional[exp.Expression]: 6742 this = self._parse_bitwise() 6743 6744 if self._match(TokenType.USING): 6745 to: t.Optional[exp.Expression] = self.expression( 6746 exp.CharacterSet, this=self._parse_var() 6747 ) 6748 elif self._match(TokenType.COMMA): 6749 to = self._parse_types() 6750 else: 6751 to = None 6752 6753 return self.build_cast(strict=strict, this=this, to=to, safe=safe) 6754 6755 def _parse_xml_table(self) -> exp.XMLTable: 6756 namespaces = None 6757 passing = None 6758 columns = None 6759 6760 if self._match_text_seq("XMLNAMESPACES", "("): 6761 namespaces = self._parse_xml_namespace() 6762 self._match_text_seq(")", ",") 6763 6764 this = self._parse_string() 6765 6766 if self._match_text_seq("PASSING"): 6767 # The BY VALUE keywords are optional and are provided for semantic clarity 6768 self._match_text_seq("BY", "VALUE") 6769 passing = self._parse_csv(self._parse_column) 6770 6771 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6772 6773 if self._match_text_seq("COLUMNS"): 6774 columns = self._parse_csv(self._parse_field_def) 6775 6776 return self.expression( 6777 exp.XMLTable, 6778 this=this, 6779 namespaces=namespaces, 6780 passing=passing, 6781 columns=columns, 6782 by_ref=by_ref, 6783 ) 6784 6785 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6786 namespaces = [] 6787 6788 while True: 6789 if self._match(TokenType.DEFAULT): 6790 uri = self._parse_string() 6791 else: 6792 uri = self._parse_alias(self._parse_string()) 6793 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6794 if not self._match(TokenType.COMMA): 6795 break 6796 6797 return namespaces 6798 6799 def _parse_decode(self) -> t.Optional[exp.Decode | exp.DecodeCase]: 6800 args = self._parse_csv(self._parse_assignment) 6801 6802 if len(args) < 3: 6803 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6804 6805 return self.expression(exp.DecodeCase, expressions=args) 6806 6807 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6808 self._match_text_seq("KEY") 6809 key = self._parse_column() 6810 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6811 self._match_text_seq("VALUE") 6812 value = self._parse_bitwise() 6813 6814 if not key and not value: 6815 return None 6816 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6817 6818 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6819 if not this or not self._match_text_seq("FORMAT", "JSON"): 6820 return this 6821 6822 return self.expression(exp.FormatJson, this=this) 6823 6824 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6825 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6826 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6827 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6828 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6829 else: 6830 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6831 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6832 6833 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6834 6835 if not empty and not error and not null: 6836 return None 6837 6838 return self.expression( 6839 exp.OnCondition, 6840 empty=empty, 6841 error=error, 6842 null=null, 6843 ) 6844 6845 def _parse_on_handling( 6846 self, on: str, *values: str 6847 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6848 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6849 for value in values: 6850 if self._match_text_seq(value, "ON", on): 6851 return f"{value} ON {on}" 6852 6853 index = self._index 6854 if self._match(TokenType.DEFAULT): 6855 default_value = self._parse_bitwise() 6856 if self._match_text_seq("ON", on): 6857 return default_value 6858 6859 self._retreat(index) 6860 6861 return None 6862 6863 @t.overload 6864 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6865 6866 @t.overload 6867 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6868 6869 def _parse_json_object(self, agg=False): 6870 star = self._parse_star() 6871 expressions = ( 6872 [star] 6873 if star 6874 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6875 ) 6876 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6877 6878 unique_keys = None 6879 if self._match_text_seq("WITH", "UNIQUE"): 6880 unique_keys = True 6881 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6882 unique_keys = False 6883 6884 self._match_text_seq("KEYS") 6885 6886 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6887 self._parse_type() 6888 ) 6889 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6890 6891 return self.expression( 6892 exp.JSONObjectAgg if agg else exp.JSONObject, 6893 expressions=expressions, 6894 null_handling=null_handling, 6895 unique_keys=unique_keys, 6896 return_type=return_type, 6897 encoding=encoding, 6898 ) 6899 6900 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6901 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6902 if not self._match_text_seq("NESTED"): 6903 this = self._parse_id_var() 6904 kind = self._parse_types(allow_identifiers=False) 6905 nested = None 6906 else: 6907 this = None 6908 kind = None 6909 nested = True 6910 6911 path = self._match_text_seq("PATH") and self._parse_string() 6912 nested_schema = nested and self._parse_json_schema() 6913 6914 return self.expression( 6915 exp.JSONColumnDef, 6916 this=this, 6917 kind=kind, 6918 path=path, 6919 nested_schema=nested_schema, 6920 ) 6921 6922 def _parse_json_schema(self) -> exp.JSONSchema: 6923 self._match_text_seq("COLUMNS") 6924 return self.expression( 6925 exp.JSONSchema, 6926 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6927 ) 6928 6929 def _parse_json_table(self) -> exp.JSONTable: 6930 this = self._parse_format_json(self._parse_bitwise()) 6931 path = self._match(TokenType.COMMA) and self._parse_string() 6932 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6933 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6934 schema = self._parse_json_schema() 6935 6936 return exp.JSONTable( 6937 this=this, 6938 schema=schema, 6939 path=path, 6940 error_handling=error_handling, 6941 empty_handling=empty_handling, 6942 ) 6943 6944 def _parse_match_against(self) -> exp.MatchAgainst: 6945 if self._match_text_seq("TABLE"): 6946 # parse SingleStore MATCH(TABLE ...) syntax 6947 # https://docs.singlestore.com/cloud/reference/sql-reference/full-text-search-functions/match/ 6948 expressions = [] 6949 table = self._parse_table() 6950 if table: 6951 expressions = [table] 6952 else: 6953 expressions = self._parse_csv(self._parse_column) 6954 6955 self._match_text_seq(")", "AGAINST", "(") 6956 6957 this = self._parse_string() 6958 6959 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6960 modifier = "IN NATURAL LANGUAGE MODE" 6961 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6962 modifier = f"{modifier} WITH QUERY EXPANSION" 6963 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6964 modifier = "IN BOOLEAN MODE" 6965 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6966 modifier = "WITH QUERY EXPANSION" 6967 else: 6968 modifier = None 6969 6970 return self.expression( 6971 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6972 ) 6973 6974 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6975 def _parse_open_json(self) -> exp.OpenJSON: 6976 this = self._parse_bitwise() 6977 path = self._match(TokenType.COMMA) and self._parse_string() 6978 6979 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6980 this = self._parse_field(any_token=True) 6981 kind = self._parse_types() 6982 path = self._parse_string() 6983 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6984 6985 return self.expression( 6986 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6987 ) 6988 6989 expressions = None 6990 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6991 self._match_l_paren() 6992 expressions = self._parse_csv(_parse_open_json_column_def) 6993 6994 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6995 6996 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6997 args = self._parse_csv(self._parse_bitwise) 6998 6999 if self._match(TokenType.IN): 7000 return self.expression( 7001 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 7002 ) 7003 7004 if haystack_first: 7005 haystack = seq_get(args, 0) 7006 needle = seq_get(args, 1) 7007 else: 7008 haystack = seq_get(args, 1) 7009 needle = seq_get(args, 0) 7010 7011 return self.expression( 7012 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 7013 ) 7014 7015 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 7016 args = self._parse_csv(self._parse_table) 7017 return exp.JoinHint(this=func_name.upper(), expressions=args) 7018 7019 def _parse_substring(self) -> exp.Substring: 7020 # Postgres supports the form: substring(string [from int] [for int]) 7021 # (despite being undocumented, the reverse order also works) 7022 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 7023 7024 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 7025 7026 start, length = None, None 7027 7028 while self._curr: 7029 if self._match(TokenType.FROM): 7030 start = self._parse_bitwise() 7031 elif self._match(TokenType.FOR): 7032 if not start: 7033 start = exp.Literal.number(1) 7034 length = self._parse_bitwise() 7035 else: 7036 break 7037 7038 if start: 7039 args.append(start) 7040 if length: 7041 args.append(length) 7042 7043 return self.validate_expression(exp.Substring.from_arg_list(args), args) 7044 7045 def _parse_trim(self) -> exp.Trim: 7046 # https://www.w3resource.com/sql/character-functions/trim.php 7047 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 7048 7049 position = None 7050 collation = None 7051 expression = None 7052 7053 if self._match_texts(self.TRIM_TYPES): 7054 position = self._prev.text.upper() 7055 7056 this = self._parse_bitwise() 7057 if self._match_set((TokenType.FROM, TokenType.COMMA)): 7058 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 7059 expression = self._parse_bitwise() 7060 7061 if invert_order: 7062 this, expression = expression, this 7063 7064 if self._match(TokenType.COLLATE): 7065 collation = self._parse_bitwise() 7066 7067 return self.expression( 7068 exp.Trim, this=this, position=position, expression=expression, collation=collation 7069 ) 7070 7071 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 7072 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 7073 7074 def _parse_named_window(self) -> t.Optional[exp.Expression]: 7075 return self._parse_window(self._parse_id_var(), alias=True) 7076 7077 def _parse_respect_or_ignore_nulls( 7078 self, this: t.Optional[exp.Expression] 7079 ) -> t.Optional[exp.Expression]: 7080 if self._match_text_seq("IGNORE", "NULLS"): 7081 return self.expression(exp.IgnoreNulls, this=this) 7082 if self._match_text_seq("RESPECT", "NULLS"): 7083 return self.expression(exp.RespectNulls, this=this) 7084 return this 7085 7086 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 7087 if self._match(TokenType.HAVING): 7088 self._match_texts(("MAX", "MIN")) 7089 max = self._prev.text.upper() != "MIN" 7090 return self.expression( 7091 exp.HavingMax, this=this, expression=self._parse_column(), max=max 7092 ) 7093 7094 return this 7095 7096 def _parse_window( 7097 self, this: t.Optional[exp.Expression], alias: bool = False 7098 ) -> t.Optional[exp.Expression]: 7099 func = this 7100 comments = func.comments if isinstance(func, exp.Expression) else None 7101 7102 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 7103 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 7104 if self._match_text_seq("WITHIN", "GROUP"): 7105 order = self._parse_wrapped(self._parse_order) 7106 this = self.expression(exp.WithinGroup, this=this, expression=order) 7107 7108 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 7109 self._match(TokenType.WHERE) 7110 this = self.expression( 7111 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 7112 ) 7113 self._match_r_paren() 7114 7115 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 7116 # Some dialects choose to implement and some do not. 7117 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 7118 7119 # There is some code above in _parse_lambda that handles 7120 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 7121 7122 # The below changes handle 7123 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 7124 7125 # Oracle allows both formats 7126 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 7127 # and Snowflake chose to do the same for familiarity 7128 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 7129 if isinstance(this, exp.AggFunc): 7130 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 7131 7132 if ignore_respect and ignore_respect is not this: 7133 ignore_respect.replace(ignore_respect.this) 7134 this = self.expression(ignore_respect.__class__, this=this) 7135 7136 this = self._parse_respect_or_ignore_nulls(this) 7137 7138 # bigquery select from window x AS (partition by ...) 7139 if alias: 7140 over = None 7141 self._match(TokenType.ALIAS) 7142 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 7143 return this 7144 else: 7145 over = self._prev.text.upper() 7146 7147 if comments and isinstance(func, exp.Expression): 7148 func.pop_comments() 7149 7150 if not self._match(TokenType.L_PAREN): 7151 return self.expression( 7152 exp.Window, 7153 comments=comments, 7154 this=this, 7155 alias=self._parse_id_var(False), 7156 over=over, 7157 ) 7158 7159 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 7160 7161 first = self._match(TokenType.FIRST) 7162 if self._match_text_seq("LAST"): 7163 first = False 7164 7165 partition, order = self._parse_partition_and_order() 7166 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 7167 7168 if kind: 7169 self._match(TokenType.BETWEEN) 7170 start = self._parse_window_spec() 7171 7172 end = self._parse_window_spec() if self._match(TokenType.AND) else {} 7173 exclude = ( 7174 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 7175 if self._match_text_seq("EXCLUDE") 7176 else None 7177 ) 7178 7179 spec = self.expression( 7180 exp.WindowSpec, 7181 kind=kind, 7182 start=start["value"], 7183 start_side=start["side"], 7184 end=end.get("value"), 7185 end_side=end.get("side"), 7186 exclude=exclude, 7187 ) 7188 else: 7189 spec = None 7190 7191 self._match_r_paren() 7192 7193 window = self.expression( 7194 exp.Window, 7195 comments=comments, 7196 this=this, 7197 partition_by=partition, 7198 order=order, 7199 spec=spec, 7200 alias=window_alias, 7201 over=over, 7202 first=first, 7203 ) 7204 7205 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 7206 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 7207 return self._parse_window(window, alias=alias) 7208 7209 return window 7210 7211 def _parse_partition_and_order( 7212 self, 7213 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 7214 return self._parse_partition_by(), self._parse_order() 7215 7216 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 7217 self._match(TokenType.BETWEEN) 7218 7219 return { 7220 "value": ( 7221 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 7222 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 7223 or self._parse_type() 7224 ), 7225 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 7226 } 7227 7228 def _parse_alias( 7229 self, this: t.Optional[exp.Expression], explicit: bool = False 7230 ) -> t.Optional[exp.Expression]: 7231 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 7232 # so this section tries to parse the clause version and if it fails, it treats the token 7233 # as an identifier (alias) 7234 if self._can_parse_limit_or_offset(): 7235 return this 7236 7237 any_token = self._match(TokenType.ALIAS) 7238 comments = self._prev_comments or [] 7239 7240 if explicit and not any_token: 7241 return this 7242 7243 if self._match(TokenType.L_PAREN): 7244 aliases = self.expression( 7245 exp.Aliases, 7246 comments=comments, 7247 this=this, 7248 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 7249 ) 7250 self._match_r_paren(aliases) 7251 return aliases 7252 7253 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 7254 self.STRING_ALIASES and self._parse_string_as_identifier() 7255 ) 7256 7257 if alias: 7258 comments.extend(alias.pop_comments()) 7259 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 7260 column = this.this 7261 7262 # Moves the comment next to the alias in `expr /* comment */ AS alias` 7263 if not this.comments and column and column.comments: 7264 this.comments = column.pop_comments() 7265 7266 return this 7267 7268 def _parse_id_var( 7269 self, 7270 any_token: bool = True, 7271 tokens: t.Optional[t.Collection[TokenType]] = None, 7272 ) -> t.Optional[exp.Expression]: 7273 expression = self._parse_identifier() 7274 if not expression and ( 7275 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7276 ): 7277 quoted = self._prev.token_type == TokenType.STRING 7278 expression = self._identifier_expression(quoted=quoted) 7279 7280 return expression 7281 7282 def _parse_string(self) -> t.Optional[exp.Expression]: 7283 if self._match_set(self.STRING_PARSERS): 7284 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7285 return self._parse_placeholder() 7286 7287 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7288 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7289 if output: 7290 output.update_positions(self._prev) 7291 return output 7292 7293 def _parse_number(self) -> t.Optional[exp.Expression]: 7294 if self._match_set(self.NUMERIC_PARSERS): 7295 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7296 return self._parse_placeholder() 7297 7298 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7299 if self._match(TokenType.IDENTIFIER): 7300 return self._identifier_expression(quoted=True) 7301 return self._parse_placeholder() 7302 7303 def _parse_var( 7304 self, 7305 any_token: bool = False, 7306 tokens: t.Optional[t.Collection[TokenType]] = None, 7307 upper: bool = False, 7308 ) -> t.Optional[exp.Expression]: 7309 if ( 7310 (any_token and self._advance_any()) 7311 or self._match(TokenType.VAR) 7312 or (self._match_set(tokens) if tokens else False) 7313 ): 7314 return self.expression( 7315 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7316 ) 7317 return self._parse_placeholder() 7318 7319 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7320 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7321 self._advance() 7322 return self._prev 7323 return None 7324 7325 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7326 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7327 7328 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7329 return self._parse_primary() or self._parse_var(any_token=True) 7330 7331 def _parse_null(self) -> t.Optional[exp.Expression]: 7332 if self._match_set((TokenType.NULL, TokenType.UNKNOWN)): 7333 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7334 return self._parse_placeholder() 7335 7336 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7337 if self._match(TokenType.TRUE): 7338 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7339 if self._match(TokenType.FALSE): 7340 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7341 return self._parse_placeholder() 7342 7343 def _parse_star(self) -> t.Optional[exp.Expression]: 7344 if self._match(TokenType.STAR): 7345 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7346 return self._parse_placeholder() 7347 7348 def _parse_parameter(self) -> exp.Parameter: 7349 this = self._parse_identifier() or self._parse_primary_or_var() 7350 return self.expression(exp.Parameter, this=this) 7351 7352 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7353 if self._match_set(self.PLACEHOLDER_PARSERS): 7354 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7355 if placeholder: 7356 return placeholder 7357 self._advance(-1) 7358 return None 7359 7360 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7361 if not self._match_texts(keywords): 7362 return None 7363 if self._match(TokenType.L_PAREN, advance=False): 7364 return self._parse_wrapped_csv(self._parse_expression) 7365 7366 expression = self._parse_alias(self._parse_assignment(), explicit=True) 7367 return [expression] if expression else None 7368 7369 def _parse_csv( 7370 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7371 ) -> t.List[exp.Expression]: 7372 parse_result = parse_method() 7373 items = [parse_result] if parse_result is not None else [] 7374 7375 while self._match(sep): 7376 self._add_comments(parse_result) 7377 parse_result = parse_method() 7378 if parse_result is not None: 7379 items.append(parse_result) 7380 7381 return items 7382 7383 def _parse_tokens( 7384 self, parse_method: t.Callable, expressions: t.Dict 7385 ) -> t.Optional[exp.Expression]: 7386 this = parse_method() 7387 7388 while self._match_set(expressions): 7389 this = self.expression( 7390 expressions[self._prev.token_type], 7391 this=this, 7392 comments=self._prev_comments, 7393 expression=parse_method(), 7394 ) 7395 7396 return this 7397 7398 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7399 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7400 7401 def _parse_wrapped_csv( 7402 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7403 ) -> t.List[exp.Expression]: 7404 return self._parse_wrapped( 7405 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7406 ) 7407 7408 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7409 wrapped = self._match(TokenType.L_PAREN) 7410 if not wrapped and not optional: 7411 self.raise_error("Expecting (") 7412 parse_result = parse_method() 7413 if wrapped: 7414 self._match_r_paren() 7415 return parse_result 7416 7417 def _parse_expressions(self) -> t.List[exp.Expression]: 7418 return self._parse_csv(self._parse_expression) 7419 7420 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7421 return ( 7422 self._parse_set_operations( 7423 self._parse_alias(self._parse_assignment(), explicit=True) 7424 if alias 7425 else self._parse_assignment() 7426 ) 7427 or self._parse_select() 7428 ) 7429 7430 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7431 return self._parse_query_modifiers( 7432 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7433 ) 7434 7435 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7436 this = None 7437 if self._match_texts(self.TRANSACTION_KIND): 7438 this = self._prev.text 7439 7440 self._match_texts(("TRANSACTION", "WORK")) 7441 7442 modes = [] 7443 while True: 7444 mode = [] 7445 while self._match(TokenType.VAR) or self._match(TokenType.NOT): 7446 mode.append(self._prev.text) 7447 7448 if mode: 7449 modes.append(" ".join(mode)) 7450 if not self._match(TokenType.COMMA): 7451 break 7452 7453 return self.expression(exp.Transaction, this=this, modes=modes) 7454 7455 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7456 chain = None 7457 savepoint = None 7458 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7459 7460 self._match_texts(("TRANSACTION", "WORK")) 7461 7462 if self._match_text_seq("TO"): 7463 self._match_text_seq("SAVEPOINT") 7464 savepoint = self._parse_id_var() 7465 7466 if self._match(TokenType.AND): 7467 chain = not self._match_text_seq("NO") 7468 self._match_text_seq("CHAIN") 7469 7470 if is_rollback: 7471 return self.expression(exp.Rollback, savepoint=savepoint) 7472 7473 return self.expression(exp.Commit, chain=chain) 7474 7475 def _parse_refresh(self) -> exp.Refresh: 7476 self._match(TokenType.TABLE) 7477 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7478 7479 def _parse_column_def_with_exists(self): 7480 start = self._index 7481 self._match(TokenType.COLUMN) 7482 7483 exists_column = self._parse_exists(not_=True) 7484 expression = self._parse_field_def() 7485 7486 if not isinstance(expression, exp.ColumnDef): 7487 self._retreat(start) 7488 return None 7489 7490 expression.set("exists", exists_column) 7491 7492 return expression 7493 7494 def _parse_add_column(self) -> t.Optional[exp.ColumnDef]: 7495 if not self._prev.text.upper() == "ADD": 7496 return None 7497 7498 expression = self._parse_column_def_with_exists() 7499 if not expression: 7500 return None 7501 7502 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7503 if self._match_texts(("FIRST", "AFTER")): 7504 position = self._prev.text 7505 column_position = self.expression( 7506 exp.ColumnPosition, this=self._parse_column(), position=position 7507 ) 7508 expression.set("position", column_position) 7509 7510 return expression 7511 7512 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7513 drop = self._match(TokenType.DROP) and self._parse_drop() 7514 if drop and not isinstance(drop, exp.Command): 7515 drop.set("kind", drop.args.get("kind", "COLUMN")) 7516 return drop 7517 7518 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7519 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7520 return self.expression( 7521 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7522 ) 7523 7524 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7525 def _parse_add_alteration() -> t.Optional[exp.Expression]: 7526 self._match_text_seq("ADD") 7527 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7528 return self.expression( 7529 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7530 ) 7531 7532 column_def = self._parse_add_column() 7533 if isinstance(column_def, exp.ColumnDef): 7534 return column_def 7535 7536 exists = self._parse_exists(not_=True) 7537 if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False): 7538 return self.expression( 7539 exp.AddPartition, 7540 exists=exists, 7541 this=self._parse_field(any_token=True), 7542 location=self._match_text_seq("LOCATION", advance=False) 7543 and self._parse_property(), 7544 ) 7545 7546 return None 7547 7548 if not self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False) and ( 7549 not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN 7550 or self._match_text_seq("COLUMNS") 7551 ): 7552 schema = self._parse_schema() 7553 7554 return ( 7555 ensure_list(schema) 7556 if schema 7557 else self._parse_csv(self._parse_column_def_with_exists) 7558 ) 7559 7560 return self._parse_csv(_parse_add_alteration) 7561 7562 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7563 if self._match_texts(self.ALTER_ALTER_PARSERS): 7564 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7565 7566 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7567 # keyword after ALTER we default to parsing this statement 7568 self._match(TokenType.COLUMN) 7569 column = self._parse_field(any_token=True) 7570 7571 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7572 return self.expression(exp.AlterColumn, this=column, drop=True) 7573 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7574 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7575 if self._match(TokenType.COMMENT): 7576 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7577 if self._match_text_seq("DROP", "NOT", "NULL"): 7578 return self.expression( 7579 exp.AlterColumn, 7580 this=column, 7581 drop=True, 7582 allow_null=True, 7583 ) 7584 if self._match_text_seq("SET", "NOT", "NULL"): 7585 return self.expression( 7586 exp.AlterColumn, 7587 this=column, 7588 allow_null=False, 7589 ) 7590 7591 if self._match_text_seq("SET", "VISIBLE"): 7592 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7593 if self._match_text_seq("SET", "INVISIBLE"): 7594 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7595 7596 self._match_text_seq("SET", "DATA") 7597 self._match_text_seq("TYPE") 7598 return self.expression( 7599 exp.AlterColumn, 7600 this=column, 7601 dtype=self._parse_types(), 7602 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7603 using=self._match(TokenType.USING) and self._parse_assignment(), 7604 ) 7605 7606 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7607 if self._match_texts(("ALL", "EVEN", "AUTO")): 7608 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7609 7610 self._match_text_seq("KEY", "DISTKEY") 7611 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7612 7613 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7614 if compound: 7615 self._match_text_seq("SORTKEY") 7616 7617 if self._match(TokenType.L_PAREN, advance=False): 7618 return self.expression( 7619 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7620 ) 7621 7622 self._match_texts(("AUTO", "NONE")) 7623 return self.expression( 7624 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7625 ) 7626 7627 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7628 index = self._index - 1 7629 7630 partition_exists = self._parse_exists() 7631 if self._match(TokenType.PARTITION, advance=False): 7632 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7633 7634 self._retreat(index) 7635 return self._parse_csv(self._parse_drop_column) 7636 7637 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7638 if self._match(TokenType.COLUMN) or not self.ALTER_RENAME_REQUIRES_COLUMN: 7639 exists = self._parse_exists() 7640 old_column = self._parse_column() 7641 to = self._match_text_seq("TO") 7642 new_column = self._parse_column() 7643 7644 if old_column is None or to is None or new_column is None: 7645 return None 7646 7647 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7648 7649 self._match_text_seq("TO") 7650 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7651 7652 def _parse_alter_table_set(self) -> exp.AlterSet: 7653 alter_set = self.expression(exp.AlterSet) 7654 7655 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7656 "TABLE", "PROPERTIES" 7657 ): 7658 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7659 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7660 alter_set.set("expressions", [self._parse_assignment()]) 7661 elif self._match_texts(("LOGGED", "UNLOGGED")): 7662 alter_set.set("option", exp.var(self._prev.text.upper())) 7663 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7664 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7665 elif self._match_text_seq("LOCATION"): 7666 alter_set.set("location", self._parse_field()) 7667 elif self._match_text_seq("ACCESS", "METHOD"): 7668 alter_set.set("access_method", self._parse_field()) 7669 elif self._match_text_seq("TABLESPACE"): 7670 alter_set.set("tablespace", self._parse_field()) 7671 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7672 alter_set.set("file_format", [self._parse_field()]) 7673 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7674 alter_set.set("file_format", self._parse_wrapped_options()) 7675 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7676 alter_set.set("copy_options", self._parse_wrapped_options()) 7677 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7678 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7679 else: 7680 if self._match_text_seq("SERDE"): 7681 alter_set.set("serde", self._parse_field()) 7682 7683 properties = self._parse_wrapped(self._parse_properties, optional=True) 7684 alter_set.set("expressions", [properties]) 7685 7686 return alter_set 7687 7688 def _parse_alter_session(self) -> exp.AlterSession: 7689 """Parse ALTER SESSION SET/UNSET statements.""" 7690 if self._match(TokenType.SET): 7691 expressions = self._parse_csv(lambda: self._parse_set_item_assignment()) 7692 return self.expression(exp.AlterSession, expressions=expressions, unset=False) 7693 7694 self._match_text_seq("UNSET") 7695 expressions = self._parse_csv( 7696 lambda: self.expression(exp.SetItem, this=self._parse_id_var(any_token=True)) 7697 ) 7698 return self.expression(exp.AlterSession, expressions=expressions, unset=True) 7699 7700 def _parse_alter(self) -> exp.Alter | exp.Command: 7701 start = self._prev 7702 7703 alter_token = self._match_set(self.ALTERABLES) and self._prev 7704 if not alter_token: 7705 return self._parse_as_command(start) 7706 7707 exists = self._parse_exists() 7708 only = self._match_text_seq("ONLY") 7709 7710 if alter_token.token_type == TokenType.SESSION: 7711 this = None 7712 check = None 7713 cluster = None 7714 else: 7715 this = self._parse_table(schema=True) 7716 check = self._match_text_seq("WITH", "CHECK") 7717 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7718 7719 if self._next: 7720 self._advance() 7721 7722 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7723 if parser: 7724 actions = ensure_list(parser(self)) 7725 not_valid = self._match_text_seq("NOT", "VALID") 7726 options = self._parse_csv(self._parse_property) 7727 7728 if not self._curr and actions: 7729 return self.expression( 7730 exp.Alter, 7731 this=this, 7732 kind=alter_token.text.upper(), 7733 exists=exists, 7734 actions=actions, 7735 only=only, 7736 options=options, 7737 cluster=cluster, 7738 not_valid=not_valid, 7739 check=check, 7740 ) 7741 7742 return self._parse_as_command(start) 7743 7744 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7745 start = self._prev 7746 # https://duckdb.org/docs/sql/statements/analyze 7747 if not self._curr: 7748 return self.expression(exp.Analyze) 7749 7750 options = [] 7751 while self._match_texts(self.ANALYZE_STYLES): 7752 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7753 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7754 else: 7755 options.append(self._prev.text.upper()) 7756 7757 this: t.Optional[exp.Expression] = None 7758 inner_expression: t.Optional[exp.Expression] = None 7759 7760 kind = self._curr and self._curr.text.upper() 7761 7762 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7763 this = self._parse_table_parts() 7764 elif self._match_text_seq("TABLES"): 7765 if self._match_set((TokenType.FROM, TokenType.IN)): 7766 kind = f"{kind} {self._prev.text.upper()}" 7767 this = self._parse_table(schema=True, is_db_reference=True) 7768 elif self._match_text_seq("DATABASE"): 7769 this = self._parse_table(schema=True, is_db_reference=True) 7770 elif self._match_text_seq("CLUSTER"): 7771 this = self._parse_table() 7772 # Try matching inner expr keywords before fallback to parse table. 7773 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7774 kind = None 7775 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7776 else: 7777 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7778 kind = None 7779 this = self._parse_table_parts() 7780 7781 partition = self._try_parse(self._parse_partition) 7782 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7783 return self._parse_as_command(start) 7784 7785 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7786 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7787 "WITH", "ASYNC", "MODE" 7788 ): 7789 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7790 else: 7791 mode = None 7792 7793 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7794 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7795 7796 properties = self._parse_properties() 7797 return self.expression( 7798 exp.Analyze, 7799 kind=kind, 7800 this=this, 7801 mode=mode, 7802 partition=partition, 7803 properties=properties, 7804 expression=inner_expression, 7805 options=options, 7806 ) 7807 7808 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7809 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7810 this = None 7811 kind = self._prev.text.upper() 7812 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7813 expressions = [] 7814 7815 if not self._match_text_seq("STATISTICS"): 7816 self.raise_error("Expecting token STATISTICS") 7817 7818 if self._match_text_seq("NOSCAN"): 7819 this = "NOSCAN" 7820 elif self._match(TokenType.FOR): 7821 if self._match_text_seq("ALL", "COLUMNS"): 7822 this = "FOR ALL COLUMNS" 7823 if self._match_texts("COLUMNS"): 7824 this = "FOR COLUMNS" 7825 expressions = self._parse_csv(self._parse_column_reference) 7826 elif self._match_text_seq("SAMPLE"): 7827 sample = self._parse_number() 7828 expressions = [ 7829 self.expression( 7830 exp.AnalyzeSample, 7831 sample=sample, 7832 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7833 ) 7834 ] 7835 7836 return self.expression( 7837 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7838 ) 7839 7840 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7841 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7842 kind = None 7843 this = None 7844 expression: t.Optional[exp.Expression] = None 7845 if self._match_text_seq("REF", "UPDATE"): 7846 kind = "REF" 7847 this = "UPDATE" 7848 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7849 this = "UPDATE SET DANGLING TO NULL" 7850 elif self._match_text_seq("STRUCTURE"): 7851 kind = "STRUCTURE" 7852 if self._match_text_seq("CASCADE", "FAST"): 7853 this = "CASCADE FAST" 7854 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7855 ("ONLINE", "OFFLINE") 7856 ): 7857 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7858 expression = self._parse_into() 7859 7860 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7861 7862 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7863 this = self._prev.text.upper() 7864 if self._match_text_seq("COLUMNS"): 7865 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7866 return None 7867 7868 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7869 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7870 if self._match_text_seq("STATISTICS"): 7871 return self.expression(exp.AnalyzeDelete, kind=kind) 7872 return None 7873 7874 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7875 if self._match_text_seq("CHAINED", "ROWS"): 7876 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7877 return None 7878 7879 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7880 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7881 this = self._prev.text.upper() 7882 expression: t.Optional[exp.Expression] = None 7883 expressions = [] 7884 update_options = None 7885 7886 if self._match_text_seq("HISTOGRAM", "ON"): 7887 expressions = self._parse_csv(self._parse_column_reference) 7888 with_expressions = [] 7889 while self._match(TokenType.WITH): 7890 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7891 if self._match_texts(("SYNC", "ASYNC")): 7892 if self._match_text_seq("MODE", advance=False): 7893 with_expressions.append(f"{self._prev.text.upper()} MODE") 7894 self._advance() 7895 else: 7896 buckets = self._parse_number() 7897 if self._match_text_seq("BUCKETS"): 7898 with_expressions.append(f"{buckets} BUCKETS") 7899 if with_expressions: 7900 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7901 7902 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7903 TokenType.UPDATE, advance=False 7904 ): 7905 update_options = self._prev.text.upper() 7906 self._advance() 7907 elif self._match_text_seq("USING", "DATA"): 7908 expression = self.expression(exp.UsingData, this=self._parse_string()) 7909 7910 return self.expression( 7911 exp.AnalyzeHistogram, 7912 this=this, 7913 expressions=expressions, 7914 expression=expression, 7915 update_options=update_options, 7916 ) 7917 7918 def _parse_merge(self) -> exp.Merge: 7919 self._match(TokenType.INTO) 7920 target = self._parse_table() 7921 7922 if target and self._match(TokenType.ALIAS, advance=False): 7923 target.set("alias", self._parse_table_alias()) 7924 7925 self._match(TokenType.USING) 7926 using = self._parse_table() 7927 7928 self._match(TokenType.ON) 7929 on = self._parse_assignment() 7930 7931 return self.expression( 7932 exp.Merge, 7933 this=target, 7934 using=using, 7935 on=on, 7936 whens=self._parse_when_matched(), 7937 returning=self._parse_returning(), 7938 ) 7939 7940 def _parse_when_matched(self) -> exp.Whens: 7941 whens = [] 7942 7943 while self._match(TokenType.WHEN): 7944 matched = not self._match(TokenType.NOT) 7945 self._match_text_seq("MATCHED") 7946 source = ( 7947 False 7948 if self._match_text_seq("BY", "TARGET") 7949 else self._match_text_seq("BY", "SOURCE") 7950 ) 7951 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7952 7953 self._match(TokenType.THEN) 7954 7955 if self._match(TokenType.INSERT): 7956 this = self._parse_star() 7957 if this: 7958 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7959 else: 7960 then = self.expression( 7961 exp.Insert, 7962 this=exp.var("ROW") 7963 if self._match_text_seq("ROW") 7964 else self._parse_value(values=False), 7965 expression=self._match_text_seq("VALUES") and self._parse_value(), 7966 ) 7967 elif self._match(TokenType.UPDATE): 7968 expressions = self._parse_star() 7969 if expressions: 7970 then = self.expression(exp.Update, expressions=expressions) 7971 else: 7972 then = self.expression( 7973 exp.Update, 7974 expressions=self._match(TokenType.SET) 7975 and self._parse_csv(self._parse_equality), 7976 ) 7977 elif self._match(TokenType.DELETE): 7978 then = self.expression(exp.Var, this=self._prev.text) 7979 else: 7980 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7981 7982 whens.append( 7983 self.expression( 7984 exp.When, 7985 matched=matched, 7986 source=source, 7987 condition=condition, 7988 then=then, 7989 ) 7990 ) 7991 return self.expression(exp.Whens, expressions=whens) 7992 7993 def _parse_show(self) -> t.Optional[exp.Expression]: 7994 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7995 if parser: 7996 return parser(self) 7997 return self._parse_as_command(self._prev) 7998 7999 def _parse_set_item_assignment( 8000 self, kind: t.Optional[str] = None 8001 ) -> t.Optional[exp.Expression]: 8002 index = self._index 8003 8004 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 8005 return self._parse_set_transaction(global_=kind == "GLOBAL") 8006 8007 left = self._parse_primary() or self._parse_column() 8008 assignment_delimiter = self._match_texts(("=", "TO")) 8009 8010 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 8011 self._retreat(index) 8012 return None 8013 8014 right = self._parse_statement() or self._parse_id_var() 8015 if isinstance(right, (exp.Column, exp.Identifier)): 8016 right = exp.var(right.name) 8017 8018 this = self.expression(exp.EQ, this=left, expression=right) 8019 return self.expression(exp.SetItem, this=this, kind=kind) 8020 8021 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 8022 self._match_text_seq("TRANSACTION") 8023 characteristics = self._parse_csv( 8024 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 8025 ) 8026 return self.expression( 8027 exp.SetItem, 8028 expressions=characteristics, 8029 kind="TRANSACTION", 8030 **{"global": global_}, # type: ignore 8031 ) 8032 8033 def _parse_set_item(self) -> t.Optional[exp.Expression]: 8034 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 8035 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 8036 8037 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 8038 index = self._index 8039 set_ = self.expression( 8040 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 8041 ) 8042 8043 if self._curr: 8044 self._retreat(index) 8045 return self._parse_as_command(self._prev) 8046 8047 return set_ 8048 8049 def _parse_var_from_options( 8050 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 8051 ) -> t.Optional[exp.Var]: 8052 start = self._curr 8053 if not start: 8054 return None 8055 8056 option = start.text.upper() 8057 continuations = options.get(option) 8058 8059 index = self._index 8060 self._advance() 8061 for keywords in continuations or []: 8062 if isinstance(keywords, str): 8063 keywords = (keywords,) 8064 8065 if self._match_text_seq(*keywords): 8066 option = f"{option} {' '.join(keywords)}" 8067 break 8068 else: 8069 if continuations or continuations is None: 8070 if raise_unmatched: 8071 self.raise_error(f"Unknown option {option}") 8072 8073 self._retreat(index) 8074 return None 8075 8076 return exp.var(option) 8077 8078 def _parse_as_command(self, start: Token) -> exp.Command: 8079 while self._curr: 8080 self._advance() 8081 text = self._find_sql(start, self._prev) 8082 size = len(start.text) 8083 self._warn_unsupported() 8084 return exp.Command(this=text[:size], expression=text[size:]) 8085 8086 def _parse_dict_property(self, this: str) -> exp.DictProperty: 8087 settings = [] 8088 8089 self._match_l_paren() 8090 kind = self._parse_id_var() 8091 8092 if self._match(TokenType.L_PAREN): 8093 while True: 8094 key = self._parse_id_var() 8095 value = self._parse_primary() 8096 if not key and value is None: 8097 break 8098 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 8099 self._match(TokenType.R_PAREN) 8100 8101 self._match_r_paren() 8102 8103 return self.expression( 8104 exp.DictProperty, 8105 this=this, 8106 kind=kind.this if kind else None, 8107 settings=settings, 8108 ) 8109 8110 def _parse_dict_range(self, this: str) -> exp.DictRange: 8111 self._match_l_paren() 8112 has_min = self._match_text_seq("MIN") 8113 if has_min: 8114 min = self._parse_var() or self._parse_primary() 8115 self._match_text_seq("MAX") 8116 max = self._parse_var() or self._parse_primary() 8117 else: 8118 max = self._parse_var() or self._parse_primary() 8119 min = exp.Literal.number(0) 8120 self._match_r_paren() 8121 return self.expression(exp.DictRange, this=this, min=min, max=max) 8122 8123 def _parse_comprehension( 8124 self, this: t.Optional[exp.Expression] 8125 ) -> t.Optional[exp.Comprehension]: 8126 index = self._index 8127 expression = self._parse_column() 8128 if not self._match(TokenType.IN): 8129 self._retreat(index - 1) 8130 return None 8131 iterator = self._parse_column() 8132 condition = self._parse_assignment() if self._match_text_seq("IF") else None 8133 return self.expression( 8134 exp.Comprehension, 8135 this=this, 8136 expression=expression, 8137 iterator=iterator, 8138 condition=condition, 8139 ) 8140 8141 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 8142 if self._match(TokenType.HEREDOC_STRING): 8143 return self.expression(exp.Heredoc, this=self._prev.text) 8144 8145 if not self._match_text_seq("$"): 8146 return None 8147 8148 tags = ["$"] 8149 tag_text = None 8150 8151 if self._is_connected(): 8152 self._advance() 8153 tags.append(self._prev.text.upper()) 8154 else: 8155 self.raise_error("No closing $ found") 8156 8157 if tags[-1] != "$": 8158 if self._is_connected() and self._match_text_seq("$"): 8159 tag_text = tags[-1] 8160 tags.append("$") 8161 else: 8162 self.raise_error("No closing $ found") 8163 8164 heredoc_start = self._curr 8165 8166 while self._curr: 8167 if self._match_text_seq(*tags, advance=False): 8168 this = self._find_sql(heredoc_start, self._prev) 8169 self._advance(len(tags)) 8170 return self.expression(exp.Heredoc, this=this, tag=tag_text) 8171 8172 self._advance() 8173 8174 self.raise_error(f"No closing {''.join(tags)} found") 8175 return None 8176 8177 def _find_parser( 8178 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 8179 ) -> t.Optional[t.Callable]: 8180 if not self._curr: 8181 return None 8182 8183 index = self._index 8184 this = [] 8185 while True: 8186 # The current token might be multiple words 8187 curr = self._curr.text.upper() 8188 key = curr.split(" ") 8189 this.append(curr) 8190 8191 self._advance() 8192 result, trie = in_trie(trie, key) 8193 if result == TrieResult.FAILED: 8194 break 8195 8196 if result == TrieResult.EXISTS: 8197 subparser = parsers[" ".join(this)] 8198 return subparser 8199 8200 self._retreat(index) 8201 return None 8202 8203 def _match(self, token_type, advance=True, expression=None): 8204 if not self._curr: 8205 return None 8206 8207 if self._curr.token_type == token_type: 8208 if advance: 8209 self._advance() 8210 self._add_comments(expression) 8211 return True 8212 8213 return None 8214 8215 def _match_set(self, types, advance=True): 8216 if not self._curr: 8217 return None 8218 8219 if self._curr.token_type in types: 8220 if advance: 8221 self._advance() 8222 return True 8223 8224 return None 8225 8226 def _match_pair(self, token_type_a, token_type_b, advance=True): 8227 if not self._curr or not self._next: 8228 return None 8229 8230 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 8231 if advance: 8232 self._advance(2) 8233 return True 8234 8235 return None 8236 8237 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8238 if not self._match(TokenType.L_PAREN, expression=expression): 8239 self.raise_error("Expecting (") 8240 8241 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8242 if not self._match(TokenType.R_PAREN, expression=expression): 8243 self.raise_error("Expecting )") 8244 8245 def _match_texts(self, texts, advance=True): 8246 if ( 8247 self._curr 8248 and self._curr.token_type != TokenType.STRING 8249 and self._curr.text.upper() in texts 8250 ): 8251 if advance: 8252 self._advance() 8253 return True 8254 return None 8255 8256 def _match_text_seq(self, *texts, advance=True): 8257 index = self._index 8258 for text in texts: 8259 if ( 8260 self._curr 8261 and self._curr.token_type != TokenType.STRING 8262 and self._curr.text.upper() == text 8263 ): 8264 self._advance() 8265 else: 8266 self._retreat(index) 8267 return None 8268 8269 if not advance: 8270 self._retreat(index) 8271 8272 return True 8273 8274 def _replace_lambda( 8275 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 8276 ) -> t.Optional[exp.Expression]: 8277 if not node: 8278 return node 8279 8280 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 8281 8282 for column in node.find_all(exp.Column): 8283 typ = lambda_types.get(column.parts[0].name) 8284 if typ is not None: 8285 dot_or_id = column.to_dot() if column.table else column.this 8286 8287 if typ: 8288 dot_or_id = self.expression( 8289 exp.Cast, 8290 this=dot_or_id, 8291 to=typ, 8292 ) 8293 8294 parent = column.parent 8295 8296 while isinstance(parent, exp.Dot): 8297 if not isinstance(parent.parent, exp.Dot): 8298 parent.replace(dot_or_id) 8299 break 8300 parent = parent.parent 8301 else: 8302 if column is node: 8303 node = dot_or_id 8304 else: 8305 column.replace(dot_or_id) 8306 return node 8307 8308 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 8309 start = self._prev 8310 8311 # Not to be confused with TRUNCATE(number, decimals) function call 8312 if self._match(TokenType.L_PAREN): 8313 self._retreat(self._index - 2) 8314 return self._parse_function() 8315 8316 # Clickhouse supports TRUNCATE DATABASE as well 8317 is_database = self._match(TokenType.DATABASE) 8318 8319 self._match(TokenType.TABLE) 8320 8321 exists = self._parse_exists(not_=False) 8322 8323 expressions = self._parse_csv( 8324 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8325 ) 8326 8327 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8328 8329 if self._match_text_seq("RESTART", "IDENTITY"): 8330 identity = "RESTART" 8331 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8332 identity = "CONTINUE" 8333 else: 8334 identity = None 8335 8336 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8337 option = self._prev.text 8338 else: 8339 option = None 8340 8341 partition = self._parse_partition() 8342 8343 # Fallback case 8344 if self._curr: 8345 return self._parse_as_command(start) 8346 8347 return self.expression( 8348 exp.TruncateTable, 8349 expressions=expressions, 8350 is_database=is_database, 8351 exists=exists, 8352 cluster=cluster, 8353 identity=identity, 8354 option=option, 8355 partition=partition, 8356 ) 8357 8358 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8359 this = self._parse_ordered(self._parse_opclass) 8360 8361 if not self._match(TokenType.WITH): 8362 return this 8363 8364 op = self._parse_var(any_token=True) 8365 8366 return self.expression(exp.WithOperator, this=this, op=op) 8367 8368 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8369 self._match(TokenType.EQ) 8370 self._match(TokenType.L_PAREN) 8371 8372 opts: t.List[t.Optional[exp.Expression]] = [] 8373 option: exp.Expression | None 8374 while self._curr and not self._match(TokenType.R_PAREN): 8375 if self._match_text_seq("FORMAT_NAME", "="): 8376 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8377 option = self._parse_format_name() 8378 else: 8379 option = self._parse_property() 8380 8381 if option is None: 8382 self.raise_error("Unable to parse option") 8383 break 8384 8385 opts.append(option) 8386 8387 return opts 8388 8389 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8390 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8391 8392 options = [] 8393 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8394 option = self._parse_var(any_token=True) 8395 prev = self._prev.text.upper() 8396 8397 # Different dialects might separate options and values by white space, "=" and "AS" 8398 self._match(TokenType.EQ) 8399 self._match(TokenType.ALIAS) 8400 8401 param = self.expression(exp.CopyParameter, this=option) 8402 8403 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8404 TokenType.L_PAREN, advance=False 8405 ): 8406 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8407 param.set("expressions", self._parse_wrapped_options()) 8408 elif prev == "FILE_FORMAT": 8409 # T-SQL's external file format case 8410 param.set("expression", self._parse_field()) 8411 else: 8412 param.set("expression", self._parse_unquoted_field()) 8413 8414 options.append(param) 8415 self._match(sep) 8416 8417 return options 8418 8419 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8420 expr = self.expression(exp.Credentials) 8421 8422 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8423 expr.set("storage", self._parse_field()) 8424 if self._match_text_seq("CREDENTIALS"): 8425 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8426 creds = ( 8427 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8428 ) 8429 expr.set("credentials", creds) 8430 if self._match_text_seq("ENCRYPTION"): 8431 expr.set("encryption", self._parse_wrapped_options()) 8432 if self._match_text_seq("IAM_ROLE"): 8433 expr.set("iam_role", self._parse_field()) 8434 if self._match_text_seq("REGION"): 8435 expr.set("region", self._parse_field()) 8436 8437 return expr 8438 8439 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8440 return self._parse_field() 8441 8442 def _parse_copy(self) -> exp.Copy | exp.Command: 8443 start = self._prev 8444 8445 self._match(TokenType.INTO) 8446 8447 this = ( 8448 self._parse_select(nested=True, parse_subquery_alias=False) 8449 if self._match(TokenType.L_PAREN, advance=False) 8450 else self._parse_table(schema=True) 8451 ) 8452 8453 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8454 8455 files = self._parse_csv(self._parse_file_location) 8456 if self._match(TokenType.EQ, advance=False): 8457 # Backtrack one token since we've consumed the lhs of a parameter assignment here. 8458 # This can happen for Snowflake dialect. Instead, we'd like to parse the parameter 8459 # list via `_parse_wrapped(..)` below. 8460 self._advance(-1) 8461 files = [] 8462 8463 credentials = self._parse_credentials() 8464 8465 self._match_text_seq("WITH") 8466 8467 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8468 8469 # Fallback case 8470 if self._curr: 8471 return self._parse_as_command(start) 8472 8473 return self.expression( 8474 exp.Copy, 8475 this=this, 8476 kind=kind, 8477 credentials=credentials, 8478 files=files, 8479 params=params, 8480 ) 8481 8482 def _parse_normalize(self) -> exp.Normalize: 8483 return self.expression( 8484 exp.Normalize, 8485 this=self._parse_bitwise(), 8486 form=self._match(TokenType.COMMA) and self._parse_var(), 8487 ) 8488 8489 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8490 args = self._parse_csv(lambda: self._parse_lambda()) 8491 8492 this = seq_get(args, 0) 8493 decimals = seq_get(args, 1) 8494 8495 return expr_type( 8496 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8497 ) 8498 8499 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8500 star_token = self._prev 8501 8502 if self._match_text_seq("COLUMNS", "(", advance=False): 8503 this = self._parse_function() 8504 if isinstance(this, exp.Columns): 8505 this.set("unpack", True) 8506 return this 8507 8508 return self.expression( 8509 exp.Star, 8510 **{ # type: ignore 8511 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8512 "replace": self._parse_star_op("REPLACE"), 8513 "rename": self._parse_star_op("RENAME"), 8514 }, 8515 ).update_positions(star_token) 8516 8517 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8518 privilege_parts = [] 8519 8520 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8521 # (end of privilege list) or L_PAREN (start of column list) are met 8522 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8523 privilege_parts.append(self._curr.text.upper()) 8524 self._advance() 8525 8526 this = exp.var(" ".join(privilege_parts)) 8527 expressions = ( 8528 self._parse_wrapped_csv(self._parse_column) 8529 if self._match(TokenType.L_PAREN, advance=False) 8530 else None 8531 ) 8532 8533 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8534 8535 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8536 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8537 principal = self._parse_id_var() 8538 8539 if not principal: 8540 return None 8541 8542 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8543 8544 def _parse_grant_revoke_common( 8545 self, 8546 ) -> t.Tuple[t.Optional[t.List], t.Optional[str], t.Optional[exp.Expression]]: 8547 privileges = self._parse_csv(self._parse_grant_privilege) 8548 8549 self._match(TokenType.ON) 8550 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8551 8552 # Attempt to parse the securable e.g. MySQL allows names 8553 # such as "foo.*", "*.*" which are not easily parseable yet 8554 securable = self._try_parse(self._parse_table_parts) 8555 8556 return privileges, kind, securable 8557 8558 def _parse_grant(self) -> exp.Grant | exp.Command: 8559 start = self._prev 8560 8561 privileges, kind, securable = self._parse_grant_revoke_common() 8562 8563 if not securable or not self._match_text_seq("TO"): 8564 return self._parse_as_command(start) 8565 8566 principals = self._parse_csv(self._parse_grant_principal) 8567 8568 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8569 8570 if self._curr: 8571 return self._parse_as_command(start) 8572 8573 return self.expression( 8574 exp.Grant, 8575 privileges=privileges, 8576 kind=kind, 8577 securable=securable, 8578 principals=principals, 8579 grant_option=grant_option, 8580 ) 8581 8582 def _parse_revoke(self) -> exp.Revoke | exp.Command: 8583 start = self._prev 8584 8585 grant_option = self._match_text_seq("GRANT", "OPTION", "FOR") 8586 8587 privileges, kind, securable = self._parse_grant_revoke_common() 8588 8589 if not securable or not self._match_text_seq("FROM"): 8590 return self._parse_as_command(start) 8591 8592 principals = self._parse_csv(self._parse_grant_principal) 8593 8594 cascade = None 8595 if self._match_texts(("CASCADE", "RESTRICT")): 8596 cascade = self._prev.text.upper() 8597 8598 if self._curr: 8599 return self._parse_as_command(start) 8600 8601 return self.expression( 8602 exp.Revoke, 8603 privileges=privileges, 8604 kind=kind, 8605 securable=securable, 8606 principals=principals, 8607 grant_option=grant_option, 8608 cascade=cascade, 8609 ) 8610 8611 def _parse_overlay(self) -> exp.Overlay: 8612 return self.expression( 8613 exp.Overlay, 8614 **{ # type: ignore 8615 "this": self._parse_bitwise(), 8616 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8617 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8618 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8619 }, 8620 ) 8621 8622 def _parse_format_name(self) -> exp.Property: 8623 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8624 # for FILE_FORMAT = <format_name> 8625 return self.expression( 8626 exp.Property, 8627 this=exp.var("FORMAT_NAME"), 8628 value=self._parse_string() or self._parse_table_parts(), 8629 ) 8630 8631 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8632 args: t.List[exp.Expression] = [] 8633 8634 if self._match(TokenType.DISTINCT): 8635 args.append(self.expression(exp.Distinct, expressions=[self._parse_assignment()])) 8636 self._match(TokenType.COMMA) 8637 8638 args.extend(self._parse_csv(self._parse_assignment)) 8639 8640 return self.expression( 8641 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8642 ) 8643 8644 def _identifier_expression( 8645 self, token: t.Optional[Token] = None, **kwargs: t.Any 8646 ) -> exp.Identifier: 8647 token = token or self._prev 8648 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8649 expression.update_positions(token) 8650 return expression 8651 8652 def _build_pipe_cte( 8653 self, 8654 query: exp.Query, 8655 expressions: t.List[exp.Expression], 8656 alias_cte: t.Optional[exp.TableAlias] = None, 8657 ) -> exp.Select: 8658 new_cte: t.Optional[t.Union[str, exp.TableAlias]] 8659 if alias_cte: 8660 new_cte = alias_cte 8661 else: 8662 self._pipe_cte_counter += 1 8663 new_cte = f"__tmp{self._pipe_cte_counter}" 8664 8665 with_ = query.args.get("with") 8666 ctes = with_.pop() if with_ else None 8667 8668 new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False) 8669 if ctes: 8670 new_select.set("with", ctes) 8671 8672 return new_select.with_(new_cte, as_=query, copy=False) 8673 8674 def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select: 8675 select = self._parse_select(consume_pipe=False) 8676 if not select: 8677 return query 8678 8679 return self._build_pipe_cte( 8680 query=query.select(*select.expressions, append=False), expressions=[exp.Star()] 8681 ) 8682 8683 def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select: 8684 limit = self._parse_limit() 8685 offset = self._parse_offset() 8686 if limit: 8687 curr_limit = query.args.get("limit", limit) 8688 if curr_limit.expression.to_py() >= limit.expression.to_py(): 8689 query.limit(limit, copy=False) 8690 if offset: 8691 curr_offset = query.args.get("offset") 8692 curr_offset = curr_offset.expression.to_py() if curr_offset else 0 8693 query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False) 8694 8695 return query 8696 8697 def _parse_pipe_syntax_aggregate_fields(self) -> t.Optional[exp.Expression]: 8698 this = self._parse_assignment() 8699 if self._match_text_seq("GROUP", "AND", advance=False): 8700 return this 8701 8702 this = self._parse_alias(this) 8703 8704 if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): 8705 return self._parse_ordered(lambda: this) 8706 8707 return this 8708 8709 def _parse_pipe_syntax_aggregate_group_order_by( 8710 self, query: exp.Select, group_by_exists: bool = True 8711 ) -> exp.Select: 8712 expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) 8713 aggregates_or_groups, orders = [], [] 8714 for element in expr: 8715 if isinstance(element, exp.Ordered): 8716 this = element.this 8717 if isinstance(this, exp.Alias): 8718 element.set("this", this.args["alias"]) 8719 orders.append(element) 8720 else: 8721 this = element 8722 aggregates_or_groups.append(this) 8723 8724 if group_by_exists: 8725 query.select(*aggregates_or_groups, copy=False).group_by( 8726 *[projection.args.get("alias", projection) for projection in aggregates_or_groups], 8727 copy=False, 8728 ) 8729 else: 8730 query.select(*aggregates_or_groups, append=False, copy=False) 8731 8732 if orders: 8733 return query.order_by(*orders, append=False, copy=False) 8734 8735 return query 8736 8737 def _parse_pipe_syntax_aggregate(self, query: exp.Select) -> exp.Select: 8738 self._match_text_seq("AGGREGATE") 8739 query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) 8740 8741 if self._match(TokenType.GROUP_BY) or ( 8742 self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) 8743 ): 8744 query = self._parse_pipe_syntax_aggregate_group_order_by(query) 8745 8746 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8747 8748 def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> t.Optional[exp.Query]: 8749 first_setop = self.parse_set_operation(this=query) 8750 if not first_setop: 8751 return None 8752 8753 def _parse_and_unwrap_query() -> t.Optional[exp.Select]: 8754 expr = self._parse_paren() 8755 return expr.assert_is(exp.Subquery).unnest() if expr else None 8756 8757 first_setop.this.pop() 8758 8759 setops = [ 8760 first_setop.expression.pop().assert_is(exp.Subquery).unnest(), 8761 *self._parse_csv(_parse_and_unwrap_query), 8762 ] 8763 8764 query = self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8765 with_ = query.args.get("with") 8766 ctes = with_.pop() if with_ else None 8767 8768 if isinstance(first_setop, exp.Union): 8769 query = query.union(*setops, copy=False, **first_setop.args) 8770 elif isinstance(first_setop, exp.Except): 8771 query = query.except_(*setops, copy=False, **first_setop.args) 8772 else: 8773 query = query.intersect(*setops, copy=False, **first_setop.args) 8774 8775 query.set("with", ctes) 8776 8777 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8778 8779 def _parse_pipe_syntax_join(self, query: exp.Query) -> t.Optional[exp.Query]: 8780 join = self._parse_join() 8781 if not join: 8782 return None 8783 8784 if isinstance(query, exp.Select): 8785 return query.join(join, copy=False) 8786 8787 return query 8788 8789 def _parse_pipe_syntax_pivot(self, query: exp.Select) -> exp.Select: 8790 pivots = self._parse_pivots() 8791 if not pivots: 8792 return query 8793 8794 from_ = query.args.get("from") 8795 if from_: 8796 from_.this.set("pivots", pivots) 8797 8798 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8799 8800 def _parse_pipe_syntax_extend(self, query: exp.Select) -> exp.Select: 8801 self._match_text_seq("EXTEND") 8802 query.select(*[exp.Star(), *self._parse_expressions()], append=False, copy=False) 8803 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8804 8805 def _parse_pipe_syntax_tablesample(self, query: exp.Select) -> exp.Select: 8806 sample = self._parse_table_sample() 8807 8808 with_ = query.args.get("with") 8809 if with_: 8810 with_.expressions[-1].this.set("sample", sample) 8811 else: 8812 query.set("sample", sample) 8813 8814 return query 8815 8816 def _parse_pipe_syntax_query(self, query: exp.Query) -> t.Optional[exp.Query]: 8817 if isinstance(query, exp.Subquery): 8818 query = exp.select("*").from_(query, copy=False) 8819 8820 if not query.args.get("from"): 8821 query = exp.select("*").from_(query.subquery(copy=False), copy=False) 8822 8823 while self._match(TokenType.PIPE_GT): 8824 start = self._curr 8825 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper()) 8826 if not parser: 8827 # The set operators (UNION, etc) and the JOIN operator have a few common starting 8828 # keywords, making it tricky to disambiguate them without lookahead. The approach 8829 # here is to try and parse a set operation and if that fails, then try to parse a 8830 # join operator. If that fails as well, then the operator is not supported. 8831 parsed_query = self._parse_pipe_syntax_set_operator(query) 8832 parsed_query = parsed_query or self._parse_pipe_syntax_join(query) 8833 if not parsed_query: 8834 self._retreat(start) 8835 self.raise_error(f"Unsupported pipe syntax operator: '{start.text.upper()}'.") 8836 break 8837 query = parsed_query 8838 else: 8839 query = parser(self, query) 8840 8841 return query 8842 8843 def _parse_declareitem(self) -> t.Optional[exp.DeclareItem]: 8844 vars = self._parse_csv(self._parse_id_var) 8845 if not vars: 8846 return None 8847 8848 return self.expression( 8849 exp.DeclareItem, 8850 this=vars, 8851 kind=self._parse_types(), 8852 default=self._match(TokenType.DEFAULT) and self._parse_bitwise(), 8853 ) 8854 8855 def _parse_declare(self) -> exp.Declare | exp.Command: 8856 start = self._prev 8857 expressions = self._try_parse(lambda: self._parse_csv(self._parse_declareitem)) 8858 8859 if not expressions or self._curr: 8860 return self._parse_as_command(start) 8861 8862 return self.expression(exp.Declare, expressions=expressions) 8863 8864 def build_cast(self, strict: bool, **kwargs) -> exp.Cast: 8865 exp_class = exp.Cast if strict else exp.TryCast 8866 8867 if exp_class == exp.TryCast: 8868 kwargs["requires_string"] = self.dialect.TRY_CAST_REQUIRES_STRING 8869 8870 return self.expression(exp_class, **kwargs) 8871 8872 def _parse_json_value(self) -> exp.JSONValue: 8873 this = self._parse_bitwise() 8874 self._match(TokenType.COMMA) 8875 path = self._parse_bitwise() 8876 8877 returning = self._match(TokenType.RETURNING) and self._parse_type() 8878 8879 return self.expression( 8880 exp.JSONValue, 8881 this=this, 8882 path=self.dialect.to_json_path(path), 8883 returning=returning, 8884 on_condition=self._parse_on_condition(), 8885 ) 8886 8887 def _parse_group_concat(self) -> t.Optional[exp.Expression]: 8888 def concat_exprs( 8889 node: t.Optional[exp.Expression], exprs: t.List[exp.Expression] 8890 ) -> exp.Expression: 8891 if isinstance(node, exp.Distinct) and len(node.expressions) > 1: 8892 concat_exprs = [ 8893 self.expression(exp.Concat, expressions=node.expressions, safe=True) 8894 ] 8895 node.set("expressions", concat_exprs) 8896 return node 8897 if len(exprs) == 1: 8898 return exprs[0] 8899 return self.expression(exp.Concat, expressions=args, safe=True) 8900 8901 args = self._parse_csv(self._parse_lambda) 8902 8903 if args: 8904 order = args[-1] if isinstance(args[-1], exp.Order) else None 8905 8906 if order: 8907 # Order By is the last (or only) expression in the list and has consumed the 'expr' before it, 8908 # remove 'expr' from exp.Order and add it back to args 8909 args[-1] = order.this 8910 order.set("this", concat_exprs(order.this, args)) 8911 8912 this = order or concat_exprs(args[0], args) 8913 else: 8914 this = None 8915 8916 separator = self._parse_field() if self._match(TokenType.SEPARATOR) else None 8917 8918 return self.expression(exp.GroupConcat, this=this, separator=separator)
32def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 33 if len(args) == 1 and args[0].is_star: 34 return exp.StarMap(this=args[0]) 35 36 keys = [] 37 values = [] 38 for i in range(0, len(args), 2): 39 keys.append(args[i]) 40 values.append(args[i + 1]) 41 42 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
50def binary_range_parser( 51 expr_type: t.Type[exp.Expression], reverse_args: bool = False 52) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 53 def _parse_binary_range( 54 self: Parser, this: t.Optional[exp.Expression] 55 ) -> t.Optional[exp.Expression]: 56 expression = self._parse_bitwise() 57 if reverse_args: 58 this, expression = expression, this 59 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 60 61 return _parse_binary_range
64def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 65 # Default argument order is base, expression 66 this = seq_get(args, 0) 67 expression = seq_get(args, 1) 68 69 if expression: 70 if not dialect.LOG_BASE_FIRST: 71 this, expression = expression, this 72 return exp.Log(this=this, expression=expression) 73 74 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
94def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 95 def _builder(args: t.List, dialect: Dialect) -> E: 96 expression = expr_type( 97 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 98 ) 99 if len(args) > 2 and expr_type is exp.JSONExtract: 100 expression.set("expressions", args[2:]) 101 102 return expression 103 104 return _builder
107def build_mod(args: t.List) -> exp.Mod: 108 this = seq_get(args, 0) 109 expression = seq_get(args, 1) 110 111 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 112 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 113 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 114 115 return exp.Mod(this=this, expression=expression)
127def build_array_constructor( 128 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 129) -> exp.Expression: 130 array_exp = exp_class(expressions=args) 131 132 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 133 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 134 135 return array_exp
138def build_convert_timezone( 139 args: t.List, default_source_tz: t.Optional[str] = None 140) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 141 if len(args) == 2: 142 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 143 return exp.ConvertTimezone( 144 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 145 ) 146 147 return exp.ConvertTimezone.from_arg_list(args)
182class Parser(metaclass=_Parser): 183 """ 184 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 185 186 Args: 187 error_level: The desired error level. 188 Default: ErrorLevel.IMMEDIATE 189 error_message_context: The amount of context to capture from a query string when displaying 190 the error message (in number of characters). 191 Default: 100 192 max_errors: Maximum number of error messages to include in a raised ParseError. 193 This is only relevant if error_level is ErrorLevel.RAISE. 194 Default: 3 195 """ 196 197 FUNCTIONS: t.Dict[str, t.Callable] = { 198 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 199 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 200 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 201 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 202 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 203 ), 204 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 205 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 206 ), 207 "CHAR": lambda args: exp.Chr(expressions=args), 208 "CHR": lambda args: exp.Chr(expressions=args), 209 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 210 "CONCAT": lambda args, dialect: exp.Concat( 211 expressions=args, 212 safe=not dialect.STRICT_STRING_CONCAT, 213 coalesce=dialect.CONCAT_COALESCE, 214 ), 215 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 216 expressions=args, 217 safe=not dialect.STRICT_STRING_CONCAT, 218 coalesce=dialect.CONCAT_COALESCE, 219 ), 220 "CONVERT_TIMEZONE": build_convert_timezone, 221 "DATE_TO_DATE_STR": lambda args: exp.Cast( 222 this=seq_get(args, 0), 223 to=exp.DataType(this=exp.DataType.Type.TEXT), 224 ), 225 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 226 start=seq_get(args, 0), 227 end=seq_get(args, 1), 228 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 229 ), 230 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 231 "HEX": build_hex, 232 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 233 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 234 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 235 "LIKE": build_like, 236 "LOG": build_logarithm, 237 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 238 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 239 "LOWER": build_lower, 240 "LPAD": lambda args: build_pad(args), 241 "LEFTPAD": lambda args: build_pad(args), 242 "LTRIM": lambda args: build_trim(args), 243 "MOD": build_mod, 244 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 245 "RPAD": lambda args: build_pad(args, is_left=False), 246 "RTRIM": lambda args: build_trim(args, is_left=False), 247 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 248 if len(args) != 2 249 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 250 "STRPOS": exp.StrPosition.from_arg_list, 251 "CHARINDEX": lambda args: build_locate_strposition(args), 252 "INSTR": exp.StrPosition.from_arg_list, 253 "LOCATE": lambda args: build_locate_strposition(args), 254 "TIME_TO_TIME_STR": lambda args: exp.Cast( 255 this=seq_get(args, 0), 256 to=exp.DataType(this=exp.DataType.Type.TEXT), 257 ), 258 "TO_HEX": build_hex, 259 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 260 this=exp.Cast( 261 this=seq_get(args, 0), 262 to=exp.DataType(this=exp.DataType.Type.TEXT), 263 ), 264 start=exp.Literal.number(1), 265 length=exp.Literal.number(10), 266 ), 267 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 268 "UPPER": build_upper, 269 "VAR_MAP": build_var_map, 270 } 271 272 NO_PAREN_FUNCTIONS = { 273 TokenType.CURRENT_DATE: exp.CurrentDate, 274 TokenType.CURRENT_DATETIME: exp.CurrentDate, 275 TokenType.CURRENT_TIME: exp.CurrentTime, 276 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 277 TokenType.CURRENT_USER: exp.CurrentUser, 278 } 279 280 STRUCT_TYPE_TOKENS = { 281 TokenType.NESTED, 282 TokenType.OBJECT, 283 TokenType.STRUCT, 284 TokenType.UNION, 285 } 286 287 NESTED_TYPE_TOKENS = { 288 TokenType.ARRAY, 289 TokenType.LIST, 290 TokenType.LOWCARDINALITY, 291 TokenType.MAP, 292 TokenType.NULLABLE, 293 TokenType.RANGE, 294 *STRUCT_TYPE_TOKENS, 295 } 296 297 ENUM_TYPE_TOKENS = { 298 TokenType.DYNAMIC, 299 TokenType.ENUM, 300 TokenType.ENUM8, 301 TokenType.ENUM16, 302 } 303 304 AGGREGATE_TYPE_TOKENS = { 305 TokenType.AGGREGATEFUNCTION, 306 TokenType.SIMPLEAGGREGATEFUNCTION, 307 } 308 309 TYPE_TOKENS = { 310 TokenType.BIT, 311 TokenType.BOOLEAN, 312 TokenType.TINYINT, 313 TokenType.UTINYINT, 314 TokenType.SMALLINT, 315 TokenType.USMALLINT, 316 TokenType.INT, 317 TokenType.UINT, 318 TokenType.BIGINT, 319 TokenType.UBIGINT, 320 TokenType.INT128, 321 TokenType.UINT128, 322 TokenType.INT256, 323 TokenType.UINT256, 324 TokenType.MEDIUMINT, 325 TokenType.UMEDIUMINT, 326 TokenType.FIXEDSTRING, 327 TokenType.FLOAT, 328 TokenType.DOUBLE, 329 TokenType.UDOUBLE, 330 TokenType.CHAR, 331 TokenType.NCHAR, 332 TokenType.VARCHAR, 333 TokenType.NVARCHAR, 334 TokenType.BPCHAR, 335 TokenType.TEXT, 336 TokenType.MEDIUMTEXT, 337 TokenType.LONGTEXT, 338 TokenType.BLOB, 339 TokenType.MEDIUMBLOB, 340 TokenType.LONGBLOB, 341 TokenType.BINARY, 342 TokenType.VARBINARY, 343 TokenType.JSON, 344 TokenType.JSONB, 345 TokenType.INTERVAL, 346 TokenType.TINYBLOB, 347 TokenType.TINYTEXT, 348 TokenType.TIME, 349 TokenType.TIMETZ, 350 TokenType.TIMESTAMP, 351 TokenType.TIMESTAMP_S, 352 TokenType.TIMESTAMP_MS, 353 TokenType.TIMESTAMP_NS, 354 TokenType.TIMESTAMPTZ, 355 TokenType.TIMESTAMPLTZ, 356 TokenType.TIMESTAMPNTZ, 357 TokenType.DATETIME, 358 TokenType.DATETIME2, 359 TokenType.DATETIME64, 360 TokenType.SMALLDATETIME, 361 TokenType.DATE, 362 TokenType.DATE32, 363 TokenType.INT4RANGE, 364 TokenType.INT4MULTIRANGE, 365 TokenType.INT8RANGE, 366 TokenType.INT8MULTIRANGE, 367 TokenType.NUMRANGE, 368 TokenType.NUMMULTIRANGE, 369 TokenType.TSRANGE, 370 TokenType.TSMULTIRANGE, 371 TokenType.TSTZRANGE, 372 TokenType.TSTZMULTIRANGE, 373 TokenType.DATERANGE, 374 TokenType.DATEMULTIRANGE, 375 TokenType.DECIMAL, 376 TokenType.DECIMAL32, 377 TokenType.DECIMAL64, 378 TokenType.DECIMAL128, 379 TokenType.DECIMAL256, 380 TokenType.UDECIMAL, 381 TokenType.BIGDECIMAL, 382 TokenType.UUID, 383 TokenType.GEOGRAPHY, 384 TokenType.GEOGRAPHYPOINT, 385 TokenType.GEOMETRY, 386 TokenType.POINT, 387 TokenType.RING, 388 TokenType.LINESTRING, 389 TokenType.MULTILINESTRING, 390 TokenType.POLYGON, 391 TokenType.MULTIPOLYGON, 392 TokenType.HLLSKETCH, 393 TokenType.HSTORE, 394 TokenType.PSEUDO_TYPE, 395 TokenType.SUPER, 396 TokenType.SERIAL, 397 TokenType.SMALLSERIAL, 398 TokenType.BIGSERIAL, 399 TokenType.XML, 400 TokenType.YEAR, 401 TokenType.USERDEFINED, 402 TokenType.MONEY, 403 TokenType.SMALLMONEY, 404 TokenType.ROWVERSION, 405 TokenType.IMAGE, 406 TokenType.VARIANT, 407 TokenType.VECTOR, 408 TokenType.VOID, 409 TokenType.OBJECT, 410 TokenType.OBJECT_IDENTIFIER, 411 TokenType.INET, 412 TokenType.IPADDRESS, 413 TokenType.IPPREFIX, 414 TokenType.IPV4, 415 TokenType.IPV6, 416 TokenType.UNKNOWN, 417 TokenType.NOTHING, 418 TokenType.NULL, 419 TokenType.NAME, 420 TokenType.TDIGEST, 421 TokenType.DYNAMIC, 422 *ENUM_TYPE_TOKENS, 423 *NESTED_TYPE_TOKENS, 424 *AGGREGATE_TYPE_TOKENS, 425 } 426 427 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 428 TokenType.BIGINT: TokenType.UBIGINT, 429 TokenType.INT: TokenType.UINT, 430 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 431 TokenType.SMALLINT: TokenType.USMALLINT, 432 TokenType.TINYINT: TokenType.UTINYINT, 433 TokenType.DECIMAL: TokenType.UDECIMAL, 434 TokenType.DOUBLE: TokenType.UDOUBLE, 435 } 436 437 SUBQUERY_PREDICATES = { 438 TokenType.ANY: exp.Any, 439 TokenType.ALL: exp.All, 440 TokenType.EXISTS: exp.Exists, 441 TokenType.SOME: exp.Any, 442 } 443 444 RESERVED_TOKENS = { 445 *Tokenizer.SINGLE_TOKENS.values(), 446 TokenType.SELECT, 447 } - {TokenType.IDENTIFIER} 448 449 DB_CREATABLES = { 450 TokenType.DATABASE, 451 TokenType.DICTIONARY, 452 TokenType.FILE_FORMAT, 453 TokenType.MODEL, 454 TokenType.NAMESPACE, 455 TokenType.SCHEMA, 456 TokenType.SEMANTIC_VIEW, 457 TokenType.SEQUENCE, 458 TokenType.SINK, 459 TokenType.SOURCE, 460 TokenType.STAGE, 461 TokenType.STORAGE_INTEGRATION, 462 TokenType.STREAMLIT, 463 TokenType.TABLE, 464 TokenType.TAG, 465 TokenType.VIEW, 466 TokenType.WAREHOUSE, 467 } 468 469 CREATABLES = { 470 TokenType.COLUMN, 471 TokenType.CONSTRAINT, 472 TokenType.FOREIGN_KEY, 473 TokenType.FUNCTION, 474 TokenType.INDEX, 475 TokenType.PROCEDURE, 476 *DB_CREATABLES, 477 } 478 479 ALTERABLES = { 480 TokenType.INDEX, 481 TokenType.TABLE, 482 TokenType.VIEW, 483 TokenType.SESSION, 484 } 485 486 # Tokens that can represent identifiers 487 ID_VAR_TOKENS = { 488 TokenType.ALL, 489 TokenType.ATTACH, 490 TokenType.VAR, 491 TokenType.ANTI, 492 TokenType.APPLY, 493 TokenType.ASC, 494 TokenType.ASOF, 495 TokenType.AUTO_INCREMENT, 496 TokenType.BEGIN, 497 TokenType.BPCHAR, 498 TokenType.CACHE, 499 TokenType.CASE, 500 TokenType.COLLATE, 501 TokenType.COMMAND, 502 TokenType.COMMENT, 503 TokenType.COMMIT, 504 TokenType.CONSTRAINT, 505 TokenType.COPY, 506 TokenType.CUBE, 507 TokenType.CURRENT_SCHEMA, 508 TokenType.DEFAULT, 509 TokenType.DELETE, 510 TokenType.DESC, 511 TokenType.DESCRIBE, 512 TokenType.DETACH, 513 TokenType.DICTIONARY, 514 TokenType.DIV, 515 TokenType.END, 516 TokenType.EXECUTE, 517 TokenType.EXPORT, 518 TokenType.ESCAPE, 519 TokenType.FALSE, 520 TokenType.FIRST, 521 TokenType.FILTER, 522 TokenType.FINAL, 523 TokenType.FORMAT, 524 TokenType.FULL, 525 TokenType.GET, 526 TokenType.IDENTIFIER, 527 TokenType.IS, 528 TokenType.ISNULL, 529 TokenType.INTERVAL, 530 TokenType.KEEP, 531 TokenType.KILL, 532 TokenType.LEFT, 533 TokenType.LIMIT, 534 TokenType.LOAD, 535 TokenType.LOCK, 536 TokenType.MERGE, 537 TokenType.NATURAL, 538 TokenType.NEXT, 539 TokenType.OFFSET, 540 TokenType.OPERATOR, 541 TokenType.ORDINALITY, 542 TokenType.OVERLAPS, 543 TokenType.OVERWRITE, 544 TokenType.PARTITION, 545 TokenType.PERCENT, 546 TokenType.PIVOT, 547 TokenType.PRAGMA, 548 TokenType.PUT, 549 TokenType.RANGE, 550 TokenType.RECURSIVE, 551 TokenType.REFERENCES, 552 TokenType.REFRESH, 553 TokenType.RENAME, 554 TokenType.REPLACE, 555 TokenType.RIGHT, 556 TokenType.ROLLUP, 557 TokenType.ROW, 558 TokenType.ROWS, 559 TokenType.SEMI, 560 TokenType.SET, 561 TokenType.SETTINGS, 562 TokenType.SHOW, 563 TokenType.TEMPORARY, 564 TokenType.TOP, 565 TokenType.TRUE, 566 TokenType.TRUNCATE, 567 TokenType.UNIQUE, 568 TokenType.UNNEST, 569 TokenType.UNPIVOT, 570 TokenType.UPDATE, 571 TokenType.USE, 572 TokenType.VOLATILE, 573 TokenType.WINDOW, 574 *ALTERABLES, 575 *CREATABLES, 576 *SUBQUERY_PREDICATES, 577 *TYPE_TOKENS, 578 *NO_PAREN_FUNCTIONS, 579 } 580 ID_VAR_TOKENS.remove(TokenType.UNION) 581 582 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 583 TokenType.ANTI, 584 TokenType.ASOF, 585 TokenType.FULL, 586 TokenType.LEFT, 587 TokenType.LOCK, 588 TokenType.NATURAL, 589 TokenType.RIGHT, 590 TokenType.SEMI, 591 TokenType.WINDOW, 592 } 593 594 ALIAS_TOKENS = ID_VAR_TOKENS 595 596 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 597 598 ARRAY_CONSTRUCTORS = { 599 "ARRAY": exp.Array, 600 "LIST": exp.List, 601 } 602 603 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 604 605 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 606 607 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 608 609 FUNC_TOKENS = { 610 TokenType.COLLATE, 611 TokenType.COMMAND, 612 TokenType.CURRENT_DATE, 613 TokenType.CURRENT_DATETIME, 614 TokenType.CURRENT_SCHEMA, 615 TokenType.CURRENT_TIMESTAMP, 616 TokenType.CURRENT_TIME, 617 TokenType.CURRENT_USER, 618 TokenType.FILTER, 619 TokenType.FIRST, 620 TokenType.FORMAT, 621 TokenType.GET, 622 TokenType.GLOB, 623 TokenType.IDENTIFIER, 624 TokenType.INDEX, 625 TokenType.ISNULL, 626 TokenType.ILIKE, 627 TokenType.INSERT, 628 TokenType.LIKE, 629 TokenType.MERGE, 630 TokenType.NEXT, 631 TokenType.OFFSET, 632 TokenType.PRIMARY_KEY, 633 TokenType.RANGE, 634 TokenType.REPLACE, 635 TokenType.RLIKE, 636 TokenType.ROW, 637 TokenType.UNNEST, 638 TokenType.VAR, 639 TokenType.LEFT, 640 TokenType.RIGHT, 641 TokenType.SEQUENCE, 642 TokenType.DATE, 643 TokenType.DATETIME, 644 TokenType.TABLE, 645 TokenType.TIMESTAMP, 646 TokenType.TIMESTAMPTZ, 647 TokenType.TRUNCATE, 648 TokenType.UTC_DATE, 649 TokenType.UTC_TIME, 650 TokenType.UTC_TIMESTAMP, 651 TokenType.WINDOW, 652 TokenType.XOR, 653 *TYPE_TOKENS, 654 *SUBQUERY_PREDICATES, 655 } 656 657 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 658 TokenType.AND: exp.And, 659 } 660 661 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 662 TokenType.COLON_EQ: exp.PropertyEQ, 663 } 664 665 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 666 TokenType.OR: exp.Or, 667 } 668 669 EQUALITY = { 670 TokenType.EQ: exp.EQ, 671 TokenType.NEQ: exp.NEQ, 672 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 673 } 674 675 COMPARISON = { 676 TokenType.GT: exp.GT, 677 TokenType.GTE: exp.GTE, 678 TokenType.LT: exp.LT, 679 TokenType.LTE: exp.LTE, 680 } 681 682 BITWISE = { 683 TokenType.AMP: exp.BitwiseAnd, 684 TokenType.CARET: exp.BitwiseXor, 685 TokenType.PIPE: exp.BitwiseOr, 686 } 687 688 TERM = { 689 TokenType.DASH: exp.Sub, 690 TokenType.PLUS: exp.Add, 691 TokenType.MOD: exp.Mod, 692 TokenType.COLLATE: exp.Collate, 693 } 694 695 FACTOR = { 696 TokenType.DIV: exp.IntDiv, 697 TokenType.LR_ARROW: exp.Distance, 698 TokenType.SLASH: exp.Div, 699 TokenType.STAR: exp.Mul, 700 } 701 702 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 703 704 TIMES = { 705 TokenType.TIME, 706 TokenType.TIMETZ, 707 } 708 709 TIMESTAMPS = { 710 TokenType.TIMESTAMP, 711 TokenType.TIMESTAMPNTZ, 712 TokenType.TIMESTAMPTZ, 713 TokenType.TIMESTAMPLTZ, 714 *TIMES, 715 } 716 717 SET_OPERATIONS = { 718 TokenType.UNION, 719 TokenType.INTERSECT, 720 TokenType.EXCEPT, 721 } 722 723 JOIN_METHODS = { 724 TokenType.ASOF, 725 TokenType.NATURAL, 726 TokenType.POSITIONAL, 727 } 728 729 JOIN_SIDES = { 730 TokenType.LEFT, 731 TokenType.RIGHT, 732 TokenType.FULL, 733 } 734 735 JOIN_KINDS = { 736 TokenType.ANTI, 737 TokenType.CROSS, 738 TokenType.INNER, 739 TokenType.OUTER, 740 TokenType.SEMI, 741 TokenType.STRAIGHT_JOIN, 742 } 743 744 JOIN_HINTS: t.Set[str] = set() 745 746 LAMBDAS = { 747 TokenType.ARROW: lambda self, expressions: self.expression( 748 exp.Lambda, 749 this=self._replace_lambda( 750 self._parse_assignment(), 751 expressions, 752 ), 753 expressions=expressions, 754 ), 755 TokenType.FARROW: lambda self, expressions: self.expression( 756 exp.Kwarg, 757 this=exp.var(expressions[0].name), 758 expression=self._parse_assignment(), 759 ), 760 } 761 762 COLUMN_OPERATORS = { 763 TokenType.DOT: None, 764 TokenType.DOTCOLON: lambda self, this, to: self.expression( 765 exp.JSONCast, 766 this=this, 767 to=to, 768 ), 769 TokenType.DCOLON: lambda self, this, to: self.build_cast( 770 strict=self.STRICT_CAST, this=this, to=to 771 ), 772 TokenType.ARROW: lambda self, this, path: self.expression( 773 exp.JSONExtract, 774 this=this, 775 expression=self.dialect.to_json_path(path), 776 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 777 ), 778 TokenType.DARROW: lambda self, this, path: self.expression( 779 exp.JSONExtractScalar, 780 this=this, 781 expression=self.dialect.to_json_path(path), 782 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 783 ), 784 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 785 exp.JSONBExtract, 786 this=this, 787 expression=path, 788 ), 789 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 790 exp.JSONBExtractScalar, 791 this=this, 792 expression=path, 793 ), 794 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 795 exp.JSONBContains, 796 this=this, 797 expression=key, 798 ), 799 } 800 801 CAST_COLUMN_OPERATORS = { 802 TokenType.DOTCOLON, 803 TokenType.DCOLON, 804 } 805 806 EXPRESSION_PARSERS = { 807 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 808 exp.Column: lambda self: self._parse_column(), 809 exp.Condition: lambda self: self._parse_assignment(), 810 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 811 exp.Expression: lambda self: self._parse_expression(), 812 exp.From: lambda self: self._parse_from(joins=True), 813 exp.Group: lambda self: self._parse_group(), 814 exp.Having: lambda self: self._parse_having(), 815 exp.Hint: lambda self: self._parse_hint_body(), 816 exp.Identifier: lambda self: self._parse_id_var(), 817 exp.Join: lambda self: self._parse_join(), 818 exp.Lambda: lambda self: self._parse_lambda(), 819 exp.Lateral: lambda self: self._parse_lateral(), 820 exp.Limit: lambda self: self._parse_limit(), 821 exp.Offset: lambda self: self._parse_offset(), 822 exp.Order: lambda self: self._parse_order(), 823 exp.Ordered: lambda self: self._parse_ordered(), 824 exp.Properties: lambda self: self._parse_properties(), 825 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 826 exp.Qualify: lambda self: self._parse_qualify(), 827 exp.Returning: lambda self: self._parse_returning(), 828 exp.Select: lambda self: self._parse_select(), 829 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 830 exp.Table: lambda self: self._parse_table_parts(), 831 exp.TableAlias: lambda self: self._parse_table_alias(), 832 exp.Tuple: lambda self: self._parse_value(values=False), 833 exp.Whens: lambda self: self._parse_when_matched(), 834 exp.Where: lambda self: self._parse_where(), 835 exp.Window: lambda self: self._parse_named_window(), 836 exp.With: lambda self: self._parse_with(), 837 "JOIN_TYPE": lambda self: self._parse_join_parts(), 838 } 839 840 STATEMENT_PARSERS = { 841 TokenType.ALTER: lambda self: self._parse_alter(), 842 TokenType.ANALYZE: lambda self: self._parse_analyze(), 843 TokenType.BEGIN: lambda self: self._parse_transaction(), 844 TokenType.CACHE: lambda self: self._parse_cache(), 845 TokenType.COMMENT: lambda self: self._parse_comment(), 846 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 847 TokenType.COPY: lambda self: self._parse_copy(), 848 TokenType.CREATE: lambda self: self._parse_create(), 849 TokenType.DELETE: lambda self: self._parse_delete(), 850 TokenType.DESC: lambda self: self._parse_describe(), 851 TokenType.DESCRIBE: lambda self: self._parse_describe(), 852 TokenType.DROP: lambda self: self._parse_drop(), 853 TokenType.GRANT: lambda self: self._parse_grant(), 854 TokenType.REVOKE: lambda self: self._parse_revoke(), 855 TokenType.INSERT: lambda self: self._parse_insert(), 856 TokenType.KILL: lambda self: self._parse_kill(), 857 TokenType.LOAD: lambda self: self._parse_load(), 858 TokenType.MERGE: lambda self: self._parse_merge(), 859 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 860 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 861 TokenType.REFRESH: lambda self: self._parse_refresh(), 862 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 863 TokenType.SET: lambda self: self._parse_set(), 864 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 865 TokenType.UNCACHE: lambda self: self._parse_uncache(), 866 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 867 TokenType.UPDATE: lambda self: self._parse_update(), 868 TokenType.USE: lambda self: self._parse_use(), 869 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 870 } 871 872 UNARY_PARSERS = { 873 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 874 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 875 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 876 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 877 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 878 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 879 } 880 881 STRING_PARSERS = { 882 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 883 exp.RawString, this=token.text 884 ), 885 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 886 exp.National, this=token.text 887 ), 888 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 889 TokenType.STRING: lambda self, token: self.expression( 890 exp.Literal, this=token.text, is_string=True 891 ), 892 TokenType.UNICODE_STRING: lambda self, token: self.expression( 893 exp.UnicodeString, 894 this=token.text, 895 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 896 ), 897 } 898 899 NUMERIC_PARSERS = { 900 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 901 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 902 TokenType.HEX_STRING: lambda self, token: self.expression( 903 exp.HexString, 904 this=token.text, 905 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 906 ), 907 TokenType.NUMBER: lambda self, token: self.expression( 908 exp.Literal, this=token.text, is_string=False 909 ), 910 } 911 912 PRIMARY_PARSERS = { 913 **STRING_PARSERS, 914 **NUMERIC_PARSERS, 915 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 916 TokenType.NULL: lambda self, _: self.expression(exp.Null), 917 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 918 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 919 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 920 TokenType.STAR: lambda self, _: self._parse_star_ops(), 921 } 922 923 PLACEHOLDER_PARSERS = { 924 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 925 TokenType.PARAMETER: lambda self: self._parse_parameter(), 926 TokenType.COLON: lambda self: ( 927 self.expression(exp.Placeholder, this=self._prev.text) 928 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 929 else None 930 ), 931 } 932 933 RANGE_PARSERS = { 934 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 935 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 936 TokenType.GLOB: binary_range_parser(exp.Glob), 937 TokenType.ILIKE: binary_range_parser(exp.ILike), 938 TokenType.IN: lambda self, this: self._parse_in(this), 939 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 940 TokenType.IS: lambda self, this: self._parse_is(this), 941 TokenType.LIKE: binary_range_parser(exp.Like), 942 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 943 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 944 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 945 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 946 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 947 TokenType.QMARK_AMP: binary_range_parser(exp.JSONBContainsAllTopKeys), 948 TokenType.QMARK_PIPE: binary_range_parser(exp.JSONBContainsAnyTopKeys), 949 TokenType.HASH_DASH: binary_range_parser(exp.JSONBDeleteAtPath), 950 } 951 952 PIPE_SYNTAX_TRANSFORM_PARSERS = { 953 "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), 954 "AS": lambda self, query: self._build_pipe_cte( 955 query, [exp.Star()], self._parse_table_alias() 956 ), 957 "EXTEND": lambda self, query: self._parse_pipe_syntax_extend(query), 958 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 959 "ORDER BY": lambda self, query: query.order_by( 960 self._parse_order(), append=False, copy=False 961 ), 962 "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 963 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 964 "TABLESAMPLE": lambda self, query: self._parse_pipe_syntax_tablesample(query), 965 "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 966 "WHERE": lambda self, query: query.where(self._parse_where(), copy=False), 967 } 968 969 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 970 "ALLOWED_VALUES": lambda self: self.expression( 971 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 972 ), 973 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 974 "AUTO": lambda self: self._parse_auto_property(), 975 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 976 "BACKUP": lambda self: self.expression( 977 exp.BackupProperty, this=self._parse_var(any_token=True) 978 ), 979 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 980 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 981 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 982 "CHECKSUM": lambda self: self._parse_checksum(), 983 "CLUSTER BY": lambda self: self._parse_cluster(), 984 "CLUSTERED": lambda self: self._parse_clustered_by(), 985 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 986 exp.CollateProperty, **kwargs 987 ), 988 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 989 "CONTAINS": lambda self: self._parse_contains_property(), 990 "COPY": lambda self: self._parse_copy_property(), 991 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 992 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 993 "DEFINER": lambda self: self._parse_definer(), 994 "DETERMINISTIC": lambda self: self.expression( 995 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 996 ), 997 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 998 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 999 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 1000 "DISTKEY": lambda self: self._parse_distkey(), 1001 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 1002 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 1003 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 1004 "ENVIRONMENT": lambda self: self.expression( 1005 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 1006 ), 1007 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 1008 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 1009 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 1010 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1011 "FREESPACE": lambda self: self._parse_freespace(), 1012 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 1013 "HEAP": lambda self: self.expression(exp.HeapProperty), 1014 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 1015 "IMMUTABLE": lambda self: self.expression( 1016 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 1017 ), 1018 "INHERITS": lambda self: self.expression( 1019 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 1020 ), 1021 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 1022 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 1023 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 1024 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 1025 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 1026 "LIKE": lambda self: self._parse_create_like(), 1027 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1028 "LOCK": lambda self: self._parse_locking(), 1029 "LOCKING": lambda self: self._parse_locking(), 1030 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1031 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 1032 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1033 "MODIFIES": lambda self: self._parse_modifies_property(), 1034 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 1035 "NO": lambda self: self._parse_no_property(), 1036 "ON": lambda self: self._parse_on_property(), 1037 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1038 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1039 "PARTITION": lambda self: self._parse_partitioned_of(), 1040 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1041 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1042 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1043 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1044 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1045 "READS": lambda self: self._parse_reads_property(), 1046 "REMOTE": lambda self: self._parse_remote_with_connection(), 1047 "RETURNS": lambda self: self._parse_returns(), 1048 "STRICT": lambda self: self.expression(exp.StrictProperty), 1049 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1050 "ROW": lambda self: self._parse_row(), 1051 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1052 "SAMPLE": lambda self: self.expression( 1053 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1054 ), 1055 "SECURE": lambda self: self.expression(exp.SecureProperty), 1056 "SECURITY": lambda self: self._parse_security(), 1057 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1058 "SETTINGS": lambda self: self._parse_settings_property(), 1059 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1060 "SORTKEY": lambda self: self._parse_sortkey(), 1061 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1062 "STABLE": lambda self: self.expression( 1063 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1064 ), 1065 "STORED": lambda self: self._parse_stored(), 1066 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1067 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1068 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1069 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1070 "TO": lambda self: self._parse_to_table(), 1071 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1072 "TRANSFORM": lambda self: self.expression( 1073 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1074 ), 1075 "TTL": lambda self: self._parse_ttl(), 1076 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1077 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1078 "VOLATILE": lambda self: self._parse_volatile_property(), 1079 "WITH": lambda self: self._parse_with_property(), 1080 } 1081 1082 CONSTRAINT_PARSERS = { 1083 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1084 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1085 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1086 "CHARACTER SET": lambda self: self.expression( 1087 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1088 ), 1089 "CHECK": lambda self: self.expression( 1090 exp.CheckColumnConstraint, 1091 this=self._parse_wrapped(self._parse_assignment), 1092 enforced=self._match_text_seq("ENFORCED"), 1093 ), 1094 "COLLATE": lambda self: self.expression( 1095 exp.CollateColumnConstraint, 1096 this=self._parse_identifier() or self._parse_column(), 1097 ), 1098 "COMMENT": lambda self: self.expression( 1099 exp.CommentColumnConstraint, this=self._parse_string() 1100 ), 1101 "COMPRESS": lambda self: self._parse_compress(), 1102 "CLUSTERED": lambda self: self.expression( 1103 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1104 ), 1105 "NONCLUSTERED": lambda self: self.expression( 1106 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1107 ), 1108 "DEFAULT": lambda self: self.expression( 1109 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1110 ), 1111 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1112 "EPHEMERAL": lambda self: self.expression( 1113 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1114 ), 1115 "EXCLUDE": lambda self: self.expression( 1116 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1117 ), 1118 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1119 "FORMAT": lambda self: self.expression( 1120 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1121 ), 1122 "GENERATED": lambda self: self._parse_generated_as_identity(), 1123 "IDENTITY": lambda self: self._parse_auto_increment(), 1124 "INLINE": lambda self: self._parse_inline(), 1125 "LIKE": lambda self: self._parse_create_like(), 1126 "NOT": lambda self: self._parse_not_constraint(), 1127 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1128 "ON": lambda self: ( 1129 self._match(TokenType.UPDATE) 1130 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1131 ) 1132 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1133 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1134 "PERIOD": lambda self: self._parse_period_for_system_time(), 1135 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1136 "REFERENCES": lambda self: self._parse_references(match=False), 1137 "TITLE": lambda self: self.expression( 1138 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1139 ), 1140 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1141 "UNIQUE": lambda self: self._parse_unique(), 1142 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1143 "WATERMARK": lambda self: self.expression( 1144 exp.WatermarkColumnConstraint, 1145 this=self._match(TokenType.FOR) and self._parse_column(), 1146 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1147 ), 1148 "WITH": lambda self: self.expression( 1149 exp.Properties, expressions=self._parse_wrapped_properties() 1150 ), 1151 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1152 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1153 } 1154 1155 def _parse_partitioned_by_bucket_or_truncate(self) -> t.Optional[exp.Expression]: 1156 if not self._match(TokenType.L_PAREN, advance=False): 1157 # Partitioning by bucket or truncate follows the syntax: 1158 # PARTITION BY (BUCKET(..) | TRUNCATE(..)) 1159 # If we don't have parenthesis after each keyword, we should instead parse this as an identifier 1160 self._retreat(self._index - 1) 1161 return None 1162 1163 klass = ( 1164 exp.PartitionedByBucket 1165 if self._prev.text.upper() == "BUCKET" 1166 else exp.PartitionByTruncate 1167 ) 1168 1169 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1170 this, expression = seq_get(args, 0), seq_get(args, 1) 1171 1172 if isinstance(this, exp.Literal): 1173 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1174 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1175 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1176 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1177 # 1178 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1179 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1180 this, expression = expression, this 1181 1182 return self.expression(klass, this=this, expression=expression) 1183 1184 ALTER_PARSERS = { 1185 "ADD": lambda self: self._parse_alter_table_add(), 1186 "AS": lambda self: self._parse_select(), 1187 "ALTER": lambda self: self._parse_alter_table_alter(), 1188 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1189 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1190 "DROP": lambda self: self._parse_alter_table_drop(), 1191 "RENAME": lambda self: self._parse_alter_table_rename(), 1192 "SET": lambda self: self._parse_alter_table_set(), 1193 "SWAP": lambda self: self.expression( 1194 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1195 ), 1196 } 1197 1198 ALTER_ALTER_PARSERS = { 1199 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1200 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1201 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1202 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1203 } 1204 1205 SCHEMA_UNNAMED_CONSTRAINTS = { 1206 "CHECK", 1207 "EXCLUDE", 1208 "FOREIGN KEY", 1209 "LIKE", 1210 "PERIOD", 1211 "PRIMARY KEY", 1212 "UNIQUE", 1213 "WATERMARK", 1214 "BUCKET", 1215 "TRUNCATE", 1216 } 1217 1218 NO_PAREN_FUNCTION_PARSERS = { 1219 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1220 "CASE": lambda self: self._parse_case(), 1221 "CONNECT_BY_ROOT": lambda self: self.expression( 1222 exp.ConnectByRoot, this=self._parse_column() 1223 ), 1224 "IF": lambda self: self._parse_if(), 1225 } 1226 1227 INVALID_FUNC_NAME_TOKENS = { 1228 TokenType.IDENTIFIER, 1229 TokenType.STRING, 1230 } 1231 1232 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1233 1234 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1235 1236 FUNCTION_PARSERS = { 1237 **{ 1238 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1239 }, 1240 **{ 1241 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1242 }, 1243 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1244 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1245 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1246 "DECODE": lambda self: self._parse_decode(), 1247 "EXTRACT": lambda self: self._parse_extract(), 1248 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1249 "GAP_FILL": lambda self: self._parse_gap_fill(), 1250 "JSON_OBJECT": lambda self: self._parse_json_object(), 1251 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1252 "JSON_TABLE": lambda self: self._parse_json_table(), 1253 "MATCH": lambda self: self._parse_match_against(), 1254 "NORMALIZE": lambda self: self._parse_normalize(), 1255 "OPENJSON": lambda self: self._parse_open_json(), 1256 "OVERLAY": lambda self: self._parse_overlay(), 1257 "POSITION": lambda self: self._parse_position(), 1258 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1259 "STRING_AGG": lambda self: self._parse_string_agg(), 1260 "SUBSTRING": lambda self: self._parse_substring(), 1261 "TRIM": lambda self: self._parse_trim(), 1262 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1263 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1264 "XMLELEMENT": lambda self: self.expression( 1265 exp.XMLElement, 1266 this=self._match_text_seq("NAME") and self._parse_id_var(), 1267 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1268 ), 1269 "XMLTABLE": lambda self: self._parse_xml_table(), 1270 } 1271 1272 QUERY_MODIFIER_PARSERS = { 1273 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1274 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1275 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1276 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1277 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1278 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1279 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1280 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1281 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1282 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1283 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1284 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1285 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1286 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1287 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1288 TokenType.CLUSTER_BY: lambda self: ( 1289 "cluster", 1290 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1291 ), 1292 TokenType.DISTRIBUTE_BY: lambda self: ( 1293 "distribute", 1294 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1295 ), 1296 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1297 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1298 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1299 } 1300 QUERY_MODIFIER_TOKENS = set(QUERY_MODIFIER_PARSERS) 1301 1302 SET_PARSERS = { 1303 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1304 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1305 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1306 "TRANSACTION": lambda self: self._parse_set_transaction(), 1307 } 1308 1309 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1310 1311 TYPE_LITERAL_PARSERS = { 1312 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1313 } 1314 1315 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1316 1317 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1318 1319 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1320 1321 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1322 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1323 "ISOLATION": ( 1324 ("LEVEL", "REPEATABLE", "READ"), 1325 ("LEVEL", "READ", "COMMITTED"), 1326 ("LEVEL", "READ", "UNCOMITTED"), 1327 ("LEVEL", "SERIALIZABLE"), 1328 ), 1329 "READ": ("WRITE", "ONLY"), 1330 } 1331 1332 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1333 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1334 ) 1335 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1336 1337 CREATE_SEQUENCE: OPTIONS_TYPE = { 1338 "SCALE": ("EXTEND", "NOEXTEND"), 1339 "SHARD": ("EXTEND", "NOEXTEND"), 1340 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1341 **dict.fromkeys( 1342 ( 1343 "SESSION", 1344 "GLOBAL", 1345 "KEEP", 1346 "NOKEEP", 1347 "ORDER", 1348 "NOORDER", 1349 "NOCACHE", 1350 "CYCLE", 1351 "NOCYCLE", 1352 "NOMINVALUE", 1353 "NOMAXVALUE", 1354 "NOSCALE", 1355 "NOSHARD", 1356 ), 1357 tuple(), 1358 ), 1359 } 1360 1361 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1362 1363 USABLES: OPTIONS_TYPE = dict.fromkeys( 1364 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1365 ) 1366 1367 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1368 1369 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1370 "TYPE": ("EVOLUTION",), 1371 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1372 } 1373 1374 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1375 1376 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1377 1378 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1379 "NOT": ("ENFORCED",), 1380 "MATCH": ( 1381 "FULL", 1382 "PARTIAL", 1383 "SIMPLE", 1384 ), 1385 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1386 "USING": ( 1387 "BTREE", 1388 "HASH", 1389 ), 1390 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1391 } 1392 1393 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1394 "NO": ("OTHERS",), 1395 "CURRENT": ("ROW",), 1396 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1397 } 1398 1399 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1400 1401 CLONE_KEYWORDS = {"CLONE", "COPY"} 1402 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1403 HISTORICAL_DATA_KIND = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1404 1405 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1406 1407 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1408 1409 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1410 1411 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1412 1413 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1414 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1415 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1416 1417 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1418 1419 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1420 1421 ADD_CONSTRAINT_TOKENS = { 1422 TokenType.CONSTRAINT, 1423 TokenType.FOREIGN_KEY, 1424 TokenType.INDEX, 1425 TokenType.KEY, 1426 TokenType.PRIMARY_KEY, 1427 TokenType.UNIQUE, 1428 } 1429 1430 DISTINCT_TOKENS = {TokenType.DISTINCT} 1431 1432 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1433 1434 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1435 1436 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1437 1438 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1439 1440 ODBC_DATETIME_LITERALS = { 1441 "d": exp.Date, 1442 "t": exp.Time, 1443 "ts": exp.Timestamp, 1444 } 1445 1446 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1447 1448 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1449 1450 # The style options for the DESCRIBE statement 1451 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1452 1453 # The style options for the ANALYZE statement 1454 ANALYZE_STYLES = { 1455 "BUFFER_USAGE_LIMIT", 1456 "FULL", 1457 "LOCAL", 1458 "NO_WRITE_TO_BINLOG", 1459 "SAMPLE", 1460 "SKIP_LOCKED", 1461 "VERBOSE", 1462 } 1463 1464 ANALYZE_EXPRESSION_PARSERS = { 1465 "ALL": lambda self: self._parse_analyze_columns(), 1466 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1467 "DELETE": lambda self: self._parse_analyze_delete(), 1468 "DROP": lambda self: self._parse_analyze_histogram(), 1469 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1470 "LIST": lambda self: self._parse_analyze_list(), 1471 "PREDICATE": lambda self: self._parse_analyze_columns(), 1472 "UPDATE": lambda self: self._parse_analyze_histogram(), 1473 "VALIDATE": lambda self: self._parse_analyze_validate(), 1474 } 1475 1476 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1477 1478 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1479 1480 OPERATION_MODIFIERS: t.Set[str] = set() 1481 1482 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1483 1484 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1485 1486 STRICT_CAST = True 1487 1488 PREFIXED_PIVOT_COLUMNS = False 1489 IDENTIFY_PIVOT_STRINGS = False 1490 1491 LOG_DEFAULTS_TO_LN = False 1492 1493 # Whether the table sample clause expects CSV syntax 1494 TABLESAMPLE_CSV = False 1495 1496 # The default method used for table sampling 1497 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1498 1499 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1500 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1501 1502 # Whether the TRIM function expects the characters to trim as its first argument 1503 TRIM_PATTERN_FIRST = False 1504 1505 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1506 STRING_ALIASES = False 1507 1508 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1509 MODIFIERS_ATTACHED_TO_SET_OP = True 1510 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1511 1512 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1513 NO_PAREN_IF_COMMANDS = True 1514 1515 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1516 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1517 1518 # Whether the `:` operator is used to extract a value from a VARIANT column 1519 COLON_IS_VARIANT_EXTRACT = False 1520 1521 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1522 # If this is True and '(' is not found, the keyword will be treated as an identifier 1523 VALUES_FOLLOWED_BY_PAREN = True 1524 1525 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1526 SUPPORTS_IMPLICIT_UNNEST = False 1527 1528 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1529 INTERVAL_SPANS = True 1530 1531 # Whether a PARTITION clause can follow a table reference 1532 SUPPORTS_PARTITION_SELECTION = False 1533 1534 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1535 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1536 1537 # Whether the 'AS' keyword is optional in the CTE definition syntax 1538 OPTIONAL_ALIAS_TOKEN_CTE = True 1539 1540 # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword 1541 ALTER_RENAME_REQUIRES_COLUMN = True 1542 1543 # Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree. 1544 # In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is 1545 # to say, JOIN operators happen before comma operators. This is not the case in some dialects, such 1546 # as BigQuery, where all joins have the same precedence. 1547 JOINS_HAVE_EQUAL_PRECEDENCE = False 1548 1549 # Whether TIMESTAMP <literal> can produce a zone-aware timestamp 1550 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = False 1551 1552 # Whether map literals support arbitrary expressions as keys. 1553 # When True, allows complex keys like arrays or literals: {[1, 2]: 3}, {1: 2} (e.g. DuckDB). 1554 # When False, keys are typically restricted to identifiers. 1555 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = False 1556 1557 # Whether JSON_EXTRACT requires a JSON expression as the first argument, e.g this 1558 # is true for Snowflake but not for BigQuery which can also process strings 1559 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = False 1560 1561 # Dialects like Databricks support JOINS without join criteria 1562 # Adding an ON TRUE, makes transpilation semantically correct for other dialects 1563 ADD_JOIN_ON_TRUE = False 1564 1565 # Whether INTERVAL spans with literal format '\d+ hh:[mm:[ss[.ff]]]' 1566 # can omit the span unit `DAY TO MINUTE` or `DAY TO SECOND` 1567 SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT = False 1568 1569 __slots__ = ( 1570 "error_level", 1571 "error_message_context", 1572 "max_errors", 1573 "dialect", 1574 "sql", 1575 "errors", 1576 "_tokens", 1577 "_index", 1578 "_curr", 1579 "_next", 1580 "_prev", 1581 "_prev_comments", 1582 "_pipe_cte_counter", 1583 ) 1584 1585 # Autofilled 1586 SHOW_TRIE: t.Dict = {} 1587 SET_TRIE: t.Dict = {} 1588 1589 def __init__( 1590 self, 1591 error_level: t.Optional[ErrorLevel] = None, 1592 error_message_context: int = 100, 1593 max_errors: int = 3, 1594 dialect: DialectType = None, 1595 ): 1596 from sqlglot.dialects import Dialect 1597 1598 self.error_level = error_level or ErrorLevel.IMMEDIATE 1599 self.error_message_context = error_message_context 1600 self.max_errors = max_errors 1601 self.dialect = Dialect.get_or_raise(dialect) 1602 self.reset() 1603 1604 def reset(self): 1605 self.sql = "" 1606 self.errors = [] 1607 self._tokens = [] 1608 self._index = 0 1609 self._curr = None 1610 self._next = None 1611 self._prev = None 1612 self._prev_comments = None 1613 self._pipe_cte_counter = 0 1614 1615 def parse( 1616 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1617 ) -> t.List[t.Optional[exp.Expression]]: 1618 """ 1619 Parses a list of tokens and returns a list of syntax trees, one tree 1620 per parsed SQL statement. 1621 1622 Args: 1623 raw_tokens: The list of tokens. 1624 sql: The original SQL string, used to produce helpful debug messages. 1625 1626 Returns: 1627 The list of the produced syntax trees. 1628 """ 1629 return self._parse( 1630 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1631 ) 1632 1633 def parse_into( 1634 self, 1635 expression_types: exp.IntoType, 1636 raw_tokens: t.List[Token], 1637 sql: t.Optional[str] = None, 1638 ) -> t.List[t.Optional[exp.Expression]]: 1639 """ 1640 Parses a list of tokens into a given Expression type. If a collection of Expression 1641 types is given instead, this method will try to parse the token list into each one 1642 of them, stopping at the first for which the parsing succeeds. 1643 1644 Args: 1645 expression_types: The expression type(s) to try and parse the token list into. 1646 raw_tokens: The list of tokens. 1647 sql: The original SQL string, used to produce helpful debug messages. 1648 1649 Returns: 1650 The target Expression. 1651 """ 1652 errors = [] 1653 for expression_type in ensure_list(expression_types): 1654 parser = self.EXPRESSION_PARSERS.get(expression_type) 1655 if not parser: 1656 raise TypeError(f"No parser registered for {expression_type}") 1657 1658 try: 1659 return self._parse(parser, raw_tokens, sql) 1660 except ParseError as e: 1661 e.errors[0]["into_expression"] = expression_type 1662 errors.append(e) 1663 1664 raise ParseError( 1665 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1666 errors=merge_errors(errors), 1667 ) from errors[-1] 1668 1669 def _parse( 1670 self, 1671 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1672 raw_tokens: t.List[Token], 1673 sql: t.Optional[str] = None, 1674 ) -> t.List[t.Optional[exp.Expression]]: 1675 self.reset() 1676 self.sql = sql or "" 1677 1678 total = len(raw_tokens) 1679 chunks: t.List[t.List[Token]] = [[]] 1680 1681 for i, token in enumerate(raw_tokens): 1682 if token.token_type == TokenType.SEMICOLON: 1683 if token.comments: 1684 chunks.append([token]) 1685 1686 if i < total - 1: 1687 chunks.append([]) 1688 else: 1689 chunks[-1].append(token) 1690 1691 expressions = [] 1692 1693 for tokens in chunks: 1694 self._index = -1 1695 self._tokens = tokens 1696 self._advance() 1697 1698 expressions.append(parse_method(self)) 1699 1700 if self._index < len(self._tokens): 1701 self.raise_error("Invalid expression / Unexpected token") 1702 1703 self.check_errors() 1704 1705 return expressions 1706 1707 def check_errors(self) -> None: 1708 """Logs or raises any found errors, depending on the chosen error level setting.""" 1709 if self.error_level == ErrorLevel.WARN: 1710 for error in self.errors: 1711 logger.error(str(error)) 1712 elif self.error_level == ErrorLevel.RAISE and self.errors: 1713 raise ParseError( 1714 concat_messages(self.errors, self.max_errors), 1715 errors=merge_errors(self.errors), 1716 ) 1717 1718 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1719 """ 1720 Appends an error in the list of recorded errors or raises it, depending on the chosen 1721 error level setting. 1722 """ 1723 token = token or self._curr or self._prev or Token.string("") 1724 start = token.start 1725 end = token.end + 1 1726 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1727 highlight = self.sql[start:end] 1728 end_context = self.sql[end : end + self.error_message_context] 1729 1730 error = ParseError.new( 1731 f"{message}. Line {token.line}, Col: {token.col}.\n" 1732 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1733 description=message, 1734 line=token.line, 1735 col=token.col, 1736 start_context=start_context, 1737 highlight=highlight, 1738 end_context=end_context, 1739 ) 1740 1741 if self.error_level == ErrorLevel.IMMEDIATE: 1742 raise error 1743 1744 self.errors.append(error) 1745 1746 def expression( 1747 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1748 ) -> E: 1749 """ 1750 Creates a new, validated Expression. 1751 1752 Args: 1753 exp_class: The expression class to instantiate. 1754 comments: An optional list of comments to attach to the expression. 1755 kwargs: The arguments to set for the expression along with their respective values. 1756 1757 Returns: 1758 The target expression. 1759 """ 1760 instance = exp_class(**kwargs) 1761 instance.add_comments(comments) if comments else self._add_comments(instance) 1762 return self.validate_expression(instance) 1763 1764 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1765 if expression and self._prev_comments: 1766 expression.add_comments(self._prev_comments) 1767 self._prev_comments = None 1768 1769 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1770 """ 1771 Validates an Expression, making sure that all its mandatory arguments are set. 1772 1773 Args: 1774 expression: The expression to validate. 1775 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1776 1777 Returns: 1778 The validated expression. 1779 """ 1780 if self.error_level != ErrorLevel.IGNORE: 1781 for error_message in expression.error_messages(args): 1782 self.raise_error(error_message) 1783 1784 return expression 1785 1786 def _find_sql(self, start: Token, end: Token) -> str: 1787 return self.sql[start.start : end.end + 1] 1788 1789 def _is_connected(self) -> bool: 1790 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1791 1792 def _advance(self, times: int = 1) -> None: 1793 self._index += times 1794 self._curr = seq_get(self._tokens, self._index) 1795 self._next = seq_get(self._tokens, self._index + 1) 1796 1797 if self._index > 0: 1798 self._prev = self._tokens[self._index - 1] 1799 self._prev_comments = self._prev.comments 1800 else: 1801 self._prev = None 1802 self._prev_comments = None 1803 1804 def _retreat(self, index: int) -> None: 1805 if index != self._index: 1806 self._advance(index - self._index) 1807 1808 def _warn_unsupported(self) -> None: 1809 if len(self._tokens) <= 1: 1810 return 1811 1812 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1813 # interested in emitting a warning for the one being currently processed. 1814 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1815 1816 logger.warning( 1817 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1818 ) 1819 1820 def _parse_command(self) -> exp.Command: 1821 self._warn_unsupported() 1822 return self.expression( 1823 exp.Command, 1824 comments=self._prev_comments, 1825 this=self._prev.text.upper(), 1826 expression=self._parse_string(), 1827 ) 1828 1829 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1830 """ 1831 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1832 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1833 solve this by setting & resetting the parser state accordingly 1834 """ 1835 index = self._index 1836 error_level = self.error_level 1837 1838 self.error_level = ErrorLevel.IMMEDIATE 1839 try: 1840 this = parse_method() 1841 except ParseError: 1842 this = None 1843 finally: 1844 if not this or retreat: 1845 self._retreat(index) 1846 self.error_level = error_level 1847 1848 return this 1849 1850 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1851 start = self._prev 1852 exists = self._parse_exists() if allow_exists else None 1853 1854 self._match(TokenType.ON) 1855 1856 materialized = self._match_text_seq("MATERIALIZED") 1857 kind = self._match_set(self.CREATABLES) and self._prev 1858 if not kind: 1859 return self._parse_as_command(start) 1860 1861 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1862 this = self._parse_user_defined_function(kind=kind.token_type) 1863 elif kind.token_type == TokenType.TABLE: 1864 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1865 elif kind.token_type == TokenType.COLUMN: 1866 this = self._parse_column() 1867 else: 1868 this = self._parse_id_var() 1869 1870 self._match(TokenType.IS) 1871 1872 return self.expression( 1873 exp.Comment, 1874 this=this, 1875 kind=kind.text, 1876 expression=self._parse_string(), 1877 exists=exists, 1878 materialized=materialized, 1879 ) 1880 1881 def _parse_to_table( 1882 self, 1883 ) -> exp.ToTableProperty: 1884 table = self._parse_table_parts(schema=True) 1885 return self.expression(exp.ToTableProperty, this=table) 1886 1887 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1888 def _parse_ttl(self) -> exp.Expression: 1889 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1890 this = self._parse_bitwise() 1891 1892 if self._match_text_seq("DELETE"): 1893 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1894 if self._match_text_seq("RECOMPRESS"): 1895 return self.expression( 1896 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1897 ) 1898 if self._match_text_seq("TO", "DISK"): 1899 return self.expression( 1900 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1901 ) 1902 if self._match_text_seq("TO", "VOLUME"): 1903 return self.expression( 1904 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1905 ) 1906 1907 return this 1908 1909 expressions = self._parse_csv(_parse_ttl_action) 1910 where = self._parse_where() 1911 group = self._parse_group() 1912 1913 aggregates = None 1914 if group and self._match(TokenType.SET): 1915 aggregates = self._parse_csv(self._parse_set_item) 1916 1917 return self.expression( 1918 exp.MergeTreeTTL, 1919 expressions=expressions, 1920 where=where, 1921 group=group, 1922 aggregates=aggregates, 1923 ) 1924 1925 def _parse_statement(self) -> t.Optional[exp.Expression]: 1926 if self._curr is None: 1927 return None 1928 1929 if self._match_set(self.STATEMENT_PARSERS): 1930 comments = self._prev_comments 1931 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1932 stmt.add_comments(comments, prepend=True) 1933 return stmt 1934 1935 if self._match_set(self.dialect.tokenizer_class.COMMANDS): 1936 return self._parse_command() 1937 1938 expression = self._parse_expression() 1939 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1940 return self._parse_query_modifiers(expression) 1941 1942 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1943 start = self._prev 1944 temporary = self._match(TokenType.TEMPORARY) 1945 materialized = self._match_text_seq("MATERIALIZED") 1946 1947 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1948 if not kind: 1949 return self._parse_as_command(start) 1950 1951 concurrently = self._match_text_seq("CONCURRENTLY") 1952 if_exists = exists or self._parse_exists() 1953 1954 if kind == "COLUMN": 1955 this = self._parse_column() 1956 else: 1957 this = self._parse_table_parts( 1958 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1959 ) 1960 1961 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1962 1963 if self._match(TokenType.L_PAREN, advance=False): 1964 expressions = self._parse_wrapped_csv(self._parse_types) 1965 else: 1966 expressions = None 1967 1968 return self.expression( 1969 exp.Drop, 1970 exists=if_exists, 1971 this=this, 1972 expressions=expressions, 1973 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1974 temporary=temporary, 1975 materialized=materialized, 1976 cascade=self._match_text_seq("CASCADE"), 1977 constraints=self._match_text_seq("CONSTRAINTS"), 1978 purge=self._match_text_seq("PURGE"), 1979 cluster=cluster, 1980 concurrently=concurrently, 1981 ) 1982 1983 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1984 return ( 1985 self._match_text_seq("IF") 1986 and (not not_ or self._match(TokenType.NOT)) 1987 and self._match(TokenType.EXISTS) 1988 ) 1989 1990 def _parse_create(self) -> exp.Create | exp.Command: 1991 # Note: this can't be None because we've matched a statement parser 1992 start = self._prev 1993 1994 replace = ( 1995 start.token_type == TokenType.REPLACE 1996 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1997 or self._match_pair(TokenType.OR, TokenType.ALTER) 1998 ) 1999 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 2000 2001 unique = self._match(TokenType.UNIQUE) 2002 2003 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 2004 clustered = True 2005 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 2006 "COLUMNSTORE" 2007 ): 2008 clustered = False 2009 else: 2010 clustered = None 2011 2012 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 2013 self._advance() 2014 2015 properties = None 2016 create_token = self._match_set(self.CREATABLES) and self._prev 2017 2018 if not create_token: 2019 # exp.Properties.Location.POST_CREATE 2020 properties = self._parse_properties() 2021 create_token = self._match_set(self.CREATABLES) and self._prev 2022 2023 if not properties or not create_token: 2024 return self._parse_as_command(start) 2025 2026 concurrently = self._match_text_seq("CONCURRENTLY") 2027 exists = self._parse_exists(not_=True) 2028 this = None 2029 expression: t.Optional[exp.Expression] = None 2030 indexes = None 2031 no_schema_binding = None 2032 begin = None 2033 end = None 2034 clone = None 2035 2036 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 2037 nonlocal properties 2038 if properties and temp_props: 2039 properties.expressions.extend(temp_props.expressions) 2040 elif temp_props: 2041 properties = temp_props 2042 2043 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2044 this = self._parse_user_defined_function(kind=create_token.token_type) 2045 2046 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 2047 extend_props(self._parse_properties()) 2048 2049 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 2050 extend_props(self._parse_properties()) 2051 2052 if not expression: 2053 if self._match(TokenType.COMMAND): 2054 expression = self._parse_as_command(self._prev) 2055 else: 2056 begin = self._match(TokenType.BEGIN) 2057 return_ = self._match_text_seq("RETURN") 2058 2059 if self._match(TokenType.STRING, advance=False): 2060 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2061 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2062 expression = self._parse_string() 2063 extend_props(self._parse_properties()) 2064 else: 2065 expression = self._parse_user_defined_function_expression() 2066 2067 end = self._match_text_seq("END") 2068 2069 if return_: 2070 expression = self.expression(exp.Return, this=expression) 2071 elif create_token.token_type == TokenType.INDEX: 2072 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2073 if not self._match(TokenType.ON): 2074 index = self._parse_id_var() 2075 anonymous = False 2076 else: 2077 index = None 2078 anonymous = True 2079 2080 this = self._parse_index(index=index, anonymous=anonymous) 2081 elif create_token.token_type in self.DB_CREATABLES: 2082 table_parts = self._parse_table_parts( 2083 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2084 ) 2085 2086 # exp.Properties.Location.POST_NAME 2087 self._match(TokenType.COMMA) 2088 extend_props(self._parse_properties(before=True)) 2089 2090 this = self._parse_schema(this=table_parts) 2091 2092 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2093 extend_props(self._parse_properties()) 2094 2095 has_alias = self._match(TokenType.ALIAS) 2096 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2097 # exp.Properties.Location.POST_ALIAS 2098 extend_props(self._parse_properties()) 2099 2100 if create_token.token_type == TokenType.SEQUENCE: 2101 expression = self._parse_types() 2102 props = self._parse_properties() 2103 if props: 2104 sequence_props = exp.SequenceProperties() 2105 options = [] 2106 for prop in props: 2107 if isinstance(prop, exp.SequenceProperties): 2108 for arg, value in prop.args.items(): 2109 if arg == "options": 2110 options.extend(value) 2111 else: 2112 sequence_props.set(arg, value) 2113 prop.pop() 2114 2115 if options: 2116 sequence_props.set("options", options) 2117 2118 props.append("expressions", sequence_props) 2119 extend_props(props) 2120 else: 2121 expression = self._parse_ddl_select() 2122 2123 # Some dialects also support using a table as an alias instead of a SELECT. 2124 # Here we fallback to this as an alternative. 2125 if not expression and has_alias: 2126 expression = self._try_parse(self._parse_table_parts) 2127 2128 if create_token.token_type == TokenType.TABLE: 2129 # exp.Properties.Location.POST_EXPRESSION 2130 extend_props(self._parse_properties()) 2131 2132 indexes = [] 2133 while True: 2134 index = self._parse_index() 2135 2136 # exp.Properties.Location.POST_INDEX 2137 extend_props(self._parse_properties()) 2138 if not index: 2139 break 2140 else: 2141 self._match(TokenType.COMMA) 2142 indexes.append(index) 2143 elif create_token.token_type == TokenType.VIEW: 2144 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2145 no_schema_binding = True 2146 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2147 extend_props(self._parse_properties()) 2148 2149 shallow = self._match_text_seq("SHALLOW") 2150 2151 if self._match_texts(self.CLONE_KEYWORDS): 2152 copy = self._prev.text.lower() == "copy" 2153 clone = self.expression( 2154 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2155 ) 2156 2157 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2158 return self._parse_as_command(start) 2159 2160 create_kind_text = create_token.text.upper() 2161 return self.expression( 2162 exp.Create, 2163 this=this, 2164 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2165 replace=replace, 2166 refresh=refresh, 2167 unique=unique, 2168 expression=expression, 2169 exists=exists, 2170 properties=properties, 2171 indexes=indexes, 2172 no_schema_binding=no_schema_binding, 2173 begin=begin, 2174 end=end, 2175 clone=clone, 2176 concurrently=concurrently, 2177 clustered=clustered, 2178 ) 2179 2180 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2181 seq = exp.SequenceProperties() 2182 2183 options = [] 2184 index = self._index 2185 2186 while self._curr: 2187 self._match(TokenType.COMMA) 2188 if self._match_text_seq("INCREMENT"): 2189 self._match_text_seq("BY") 2190 self._match_text_seq("=") 2191 seq.set("increment", self._parse_term()) 2192 elif self._match_text_seq("MINVALUE"): 2193 seq.set("minvalue", self._parse_term()) 2194 elif self._match_text_seq("MAXVALUE"): 2195 seq.set("maxvalue", self._parse_term()) 2196 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2197 self._match_text_seq("=") 2198 seq.set("start", self._parse_term()) 2199 elif self._match_text_seq("CACHE"): 2200 # T-SQL allows empty CACHE which is initialized dynamically 2201 seq.set("cache", self._parse_number() or True) 2202 elif self._match_text_seq("OWNED", "BY"): 2203 # "OWNED BY NONE" is the default 2204 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2205 else: 2206 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2207 if opt: 2208 options.append(opt) 2209 else: 2210 break 2211 2212 seq.set("options", options if options else None) 2213 return None if self._index == index else seq 2214 2215 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2216 # only used for teradata currently 2217 self._match(TokenType.COMMA) 2218 2219 kwargs = { 2220 "no": self._match_text_seq("NO"), 2221 "dual": self._match_text_seq("DUAL"), 2222 "before": self._match_text_seq("BEFORE"), 2223 "default": self._match_text_seq("DEFAULT"), 2224 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2225 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2226 "after": self._match_text_seq("AFTER"), 2227 "minimum": self._match_texts(("MIN", "MINIMUM")), 2228 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2229 } 2230 2231 if self._match_texts(self.PROPERTY_PARSERS): 2232 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2233 try: 2234 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2235 except TypeError: 2236 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2237 2238 return None 2239 2240 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2241 return self._parse_wrapped_csv(self._parse_property) 2242 2243 def _parse_property(self) -> t.Optional[exp.Expression]: 2244 if self._match_texts(self.PROPERTY_PARSERS): 2245 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2246 2247 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2248 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2249 2250 if self._match_text_seq("COMPOUND", "SORTKEY"): 2251 return self._parse_sortkey(compound=True) 2252 2253 if self._match_text_seq("SQL", "SECURITY"): 2254 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2255 2256 index = self._index 2257 2258 seq_props = self._parse_sequence_properties() 2259 if seq_props: 2260 return seq_props 2261 2262 self._retreat(index) 2263 key = self._parse_column() 2264 2265 if not self._match(TokenType.EQ): 2266 self._retreat(index) 2267 return None 2268 2269 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2270 if isinstance(key, exp.Column): 2271 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2272 2273 value = self._parse_bitwise() or self._parse_var(any_token=True) 2274 2275 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2276 if isinstance(value, exp.Column): 2277 value = exp.var(value.name) 2278 2279 return self.expression(exp.Property, this=key, value=value) 2280 2281 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2282 if self._match_text_seq("BY"): 2283 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2284 2285 self._match(TokenType.ALIAS) 2286 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2287 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2288 2289 return self.expression( 2290 exp.FileFormatProperty, 2291 this=( 2292 self.expression( 2293 exp.InputOutputFormat, 2294 input_format=input_format, 2295 output_format=output_format, 2296 ) 2297 if input_format or output_format 2298 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2299 ), 2300 hive_format=True, 2301 ) 2302 2303 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2304 field = self._parse_field() 2305 if isinstance(field, exp.Identifier) and not field.quoted: 2306 field = exp.var(field) 2307 2308 return field 2309 2310 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2311 self._match(TokenType.EQ) 2312 self._match(TokenType.ALIAS) 2313 2314 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2315 2316 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2317 properties = [] 2318 while True: 2319 if before: 2320 prop = self._parse_property_before() 2321 else: 2322 prop = self._parse_property() 2323 if not prop: 2324 break 2325 for p in ensure_list(prop): 2326 properties.append(p) 2327 2328 if properties: 2329 return self.expression(exp.Properties, expressions=properties) 2330 2331 return None 2332 2333 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2334 return self.expression( 2335 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2336 ) 2337 2338 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2339 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2340 security_specifier = self._prev.text.upper() 2341 return self.expression(exp.SecurityProperty, this=security_specifier) 2342 return None 2343 2344 def _parse_settings_property(self) -> exp.SettingsProperty: 2345 return self.expression( 2346 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2347 ) 2348 2349 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2350 if self._index >= 2: 2351 pre_volatile_token = self._tokens[self._index - 2] 2352 else: 2353 pre_volatile_token = None 2354 2355 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2356 return exp.VolatileProperty() 2357 2358 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2359 2360 def _parse_retention_period(self) -> exp.Var: 2361 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2362 number = self._parse_number() 2363 number_str = f"{number} " if number else "" 2364 unit = self._parse_var(any_token=True) 2365 return exp.var(f"{number_str}{unit}") 2366 2367 def _parse_system_versioning_property( 2368 self, with_: bool = False 2369 ) -> exp.WithSystemVersioningProperty: 2370 self._match(TokenType.EQ) 2371 prop = self.expression( 2372 exp.WithSystemVersioningProperty, 2373 **{ # type: ignore 2374 "on": True, 2375 "with": with_, 2376 }, 2377 ) 2378 2379 if self._match_text_seq("OFF"): 2380 prop.set("on", False) 2381 return prop 2382 2383 self._match(TokenType.ON) 2384 if self._match(TokenType.L_PAREN): 2385 while self._curr and not self._match(TokenType.R_PAREN): 2386 if self._match_text_seq("HISTORY_TABLE", "="): 2387 prop.set("this", self._parse_table_parts()) 2388 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2389 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2390 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2391 prop.set("retention_period", self._parse_retention_period()) 2392 2393 self._match(TokenType.COMMA) 2394 2395 return prop 2396 2397 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2398 self._match(TokenType.EQ) 2399 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2400 prop = self.expression(exp.DataDeletionProperty, on=on) 2401 2402 if self._match(TokenType.L_PAREN): 2403 while self._curr and not self._match(TokenType.R_PAREN): 2404 if self._match_text_seq("FILTER_COLUMN", "="): 2405 prop.set("filter_column", self._parse_column()) 2406 elif self._match_text_seq("RETENTION_PERIOD", "="): 2407 prop.set("retention_period", self._parse_retention_period()) 2408 2409 self._match(TokenType.COMMA) 2410 2411 return prop 2412 2413 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2414 kind = "HASH" 2415 expressions: t.Optional[t.List[exp.Expression]] = None 2416 if self._match_text_seq("BY", "HASH"): 2417 expressions = self._parse_wrapped_csv(self._parse_id_var) 2418 elif self._match_text_seq("BY", "RANDOM"): 2419 kind = "RANDOM" 2420 2421 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2422 buckets: t.Optional[exp.Expression] = None 2423 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2424 buckets = self._parse_number() 2425 2426 return self.expression( 2427 exp.DistributedByProperty, 2428 expressions=expressions, 2429 kind=kind, 2430 buckets=buckets, 2431 order=self._parse_order(), 2432 ) 2433 2434 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2435 self._match_text_seq("KEY") 2436 expressions = self._parse_wrapped_id_vars() 2437 return self.expression(expr_type, expressions=expressions) 2438 2439 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2440 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2441 prop = self._parse_system_versioning_property(with_=True) 2442 self._match_r_paren() 2443 return prop 2444 2445 if self._match(TokenType.L_PAREN, advance=False): 2446 return self._parse_wrapped_properties() 2447 2448 if self._match_text_seq("JOURNAL"): 2449 return self._parse_withjournaltable() 2450 2451 if self._match_texts(self.VIEW_ATTRIBUTES): 2452 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2453 2454 if self._match_text_seq("DATA"): 2455 return self._parse_withdata(no=False) 2456 elif self._match_text_seq("NO", "DATA"): 2457 return self._parse_withdata(no=True) 2458 2459 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2460 return self._parse_serde_properties(with_=True) 2461 2462 if self._match(TokenType.SCHEMA): 2463 return self.expression( 2464 exp.WithSchemaBindingProperty, 2465 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2466 ) 2467 2468 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2469 return self.expression( 2470 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2471 ) 2472 2473 if not self._next: 2474 return None 2475 2476 return self._parse_withisolatedloading() 2477 2478 def _parse_procedure_option(self) -> exp.Expression | None: 2479 if self._match_text_seq("EXECUTE", "AS"): 2480 return self.expression( 2481 exp.ExecuteAsProperty, 2482 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2483 or self._parse_string(), 2484 ) 2485 2486 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2487 2488 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2489 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2490 self._match(TokenType.EQ) 2491 2492 user = self._parse_id_var() 2493 self._match(TokenType.PARAMETER) 2494 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2495 2496 if not user or not host: 2497 return None 2498 2499 return exp.DefinerProperty(this=f"{user}@{host}") 2500 2501 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2502 self._match(TokenType.TABLE) 2503 self._match(TokenType.EQ) 2504 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2505 2506 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2507 return self.expression(exp.LogProperty, no=no) 2508 2509 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2510 return self.expression(exp.JournalProperty, **kwargs) 2511 2512 def _parse_checksum(self) -> exp.ChecksumProperty: 2513 self._match(TokenType.EQ) 2514 2515 on = None 2516 if self._match(TokenType.ON): 2517 on = True 2518 elif self._match_text_seq("OFF"): 2519 on = False 2520 2521 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2522 2523 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2524 return self.expression( 2525 exp.Cluster, 2526 expressions=( 2527 self._parse_wrapped_csv(self._parse_ordered) 2528 if wrapped 2529 else self._parse_csv(self._parse_ordered) 2530 ), 2531 ) 2532 2533 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2534 self._match_text_seq("BY") 2535 2536 self._match_l_paren() 2537 expressions = self._parse_csv(self._parse_column) 2538 self._match_r_paren() 2539 2540 if self._match_text_seq("SORTED", "BY"): 2541 self._match_l_paren() 2542 sorted_by = self._parse_csv(self._parse_ordered) 2543 self._match_r_paren() 2544 else: 2545 sorted_by = None 2546 2547 self._match(TokenType.INTO) 2548 buckets = self._parse_number() 2549 self._match_text_seq("BUCKETS") 2550 2551 return self.expression( 2552 exp.ClusteredByProperty, 2553 expressions=expressions, 2554 sorted_by=sorted_by, 2555 buckets=buckets, 2556 ) 2557 2558 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2559 if not self._match_text_seq("GRANTS"): 2560 self._retreat(self._index - 1) 2561 return None 2562 2563 return self.expression(exp.CopyGrantsProperty) 2564 2565 def _parse_freespace(self) -> exp.FreespaceProperty: 2566 self._match(TokenType.EQ) 2567 return self.expression( 2568 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2569 ) 2570 2571 def _parse_mergeblockratio( 2572 self, no: bool = False, default: bool = False 2573 ) -> exp.MergeBlockRatioProperty: 2574 if self._match(TokenType.EQ): 2575 return self.expression( 2576 exp.MergeBlockRatioProperty, 2577 this=self._parse_number(), 2578 percent=self._match(TokenType.PERCENT), 2579 ) 2580 2581 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2582 2583 def _parse_datablocksize( 2584 self, 2585 default: t.Optional[bool] = None, 2586 minimum: t.Optional[bool] = None, 2587 maximum: t.Optional[bool] = None, 2588 ) -> exp.DataBlocksizeProperty: 2589 self._match(TokenType.EQ) 2590 size = self._parse_number() 2591 2592 units = None 2593 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2594 units = self._prev.text 2595 2596 return self.expression( 2597 exp.DataBlocksizeProperty, 2598 size=size, 2599 units=units, 2600 default=default, 2601 minimum=minimum, 2602 maximum=maximum, 2603 ) 2604 2605 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2606 self._match(TokenType.EQ) 2607 always = self._match_text_seq("ALWAYS") 2608 manual = self._match_text_seq("MANUAL") 2609 never = self._match_text_seq("NEVER") 2610 default = self._match_text_seq("DEFAULT") 2611 2612 autotemp = None 2613 if self._match_text_seq("AUTOTEMP"): 2614 autotemp = self._parse_schema() 2615 2616 return self.expression( 2617 exp.BlockCompressionProperty, 2618 always=always, 2619 manual=manual, 2620 never=never, 2621 default=default, 2622 autotemp=autotemp, 2623 ) 2624 2625 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2626 index = self._index 2627 no = self._match_text_seq("NO") 2628 concurrent = self._match_text_seq("CONCURRENT") 2629 2630 if not self._match_text_seq("ISOLATED", "LOADING"): 2631 self._retreat(index) 2632 return None 2633 2634 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2635 return self.expression( 2636 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2637 ) 2638 2639 def _parse_locking(self) -> exp.LockingProperty: 2640 if self._match(TokenType.TABLE): 2641 kind = "TABLE" 2642 elif self._match(TokenType.VIEW): 2643 kind = "VIEW" 2644 elif self._match(TokenType.ROW): 2645 kind = "ROW" 2646 elif self._match_text_seq("DATABASE"): 2647 kind = "DATABASE" 2648 else: 2649 kind = None 2650 2651 if kind in ("DATABASE", "TABLE", "VIEW"): 2652 this = self._parse_table_parts() 2653 else: 2654 this = None 2655 2656 if self._match(TokenType.FOR): 2657 for_or_in = "FOR" 2658 elif self._match(TokenType.IN): 2659 for_or_in = "IN" 2660 else: 2661 for_or_in = None 2662 2663 if self._match_text_seq("ACCESS"): 2664 lock_type = "ACCESS" 2665 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2666 lock_type = "EXCLUSIVE" 2667 elif self._match_text_seq("SHARE"): 2668 lock_type = "SHARE" 2669 elif self._match_text_seq("READ"): 2670 lock_type = "READ" 2671 elif self._match_text_seq("WRITE"): 2672 lock_type = "WRITE" 2673 elif self._match_text_seq("CHECKSUM"): 2674 lock_type = "CHECKSUM" 2675 else: 2676 lock_type = None 2677 2678 override = self._match_text_seq("OVERRIDE") 2679 2680 return self.expression( 2681 exp.LockingProperty, 2682 this=this, 2683 kind=kind, 2684 for_or_in=for_or_in, 2685 lock_type=lock_type, 2686 override=override, 2687 ) 2688 2689 def _parse_partition_by(self) -> t.List[exp.Expression]: 2690 if self._match(TokenType.PARTITION_BY): 2691 return self._parse_csv(self._parse_assignment) 2692 return [] 2693 2694 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2695 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2696 if self._match_text_seq("MINVALUE"): 2697 return exp.var("MINVALUE") 2698 if self._match_text_seq("MAXVALUE"): 2699 return exp.var("MAXVALUE") 2700 return self._parse_bitwise() 2701 2702 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2703 expression = None 2704 from_expressions = None 2705 to_expressions = None 2706 2707 if self._match(TokenType.IN): 2708 this = self._parse_wrapped_csv(self._parse_bitwise) 2709 elif self._match(TokenType.FROM): 2710 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2711 self._match_text_seq("TO") 2712 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2713 elif self._match_text_seq("WITH", "(", "MODULUS"): 2714 this = self._parse_number() 2715 self._match_text_seq(",", "REMAINDER") 2716 expression = self._parse_number() 2717 self._match_r_paren() 2718 else: 2719 self.raise_error("Failed to parse partition bound spec.") 2720 2721 return self.expression( 2722 exp.PartitionBoundSpec, 2723 this=this, 2724 expression=expression, 2725 from_expressions=from_expressions, 2726 to_expressions=to_expressions, 2727 ) 2728 2729 # https://www.postgresql.org/docs/current/sql-createtable.html 2730 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2731 if not self._match_text_seq("OF"): 2732 self._retreat(self._index - 1) 2733 return None 2734 2735 this = self._parse_table(schema=True) 2736 2737 if self._match(TokenType.DEFAULT): 2738 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2739 elif self._match_text_seq("FOR", "VALUES"): 2740 expression = self._parse_partition_bound_spec() 2741 else: 2742 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2743 2744 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2745 2746 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2747 self._match(TokenType.EQ) 2748 return self.expression( 2749 exp.PartitionedByProperty, 2750 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2751 ) 2752 2753 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2754 if self._match_text_seq("AND", "STATISTICS"): 2755 statistics = True 2756 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2757 statistics = False 2758 else: 2759 statistics = None 2760 2761 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2762 2763 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2764 if self._match_text_seq("SQL"): 2765 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2766 return None 2767 2768 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2769 if self._match_text_seq("SQL", "DATA"): 2770 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2771 return None 2772 2773 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2774 if self._match_text_seq("PRIMARY", "INDEX"): 2775 return exp.NoPrimaryIndexProperty() 2776 if self._match_text_seq("SQL"): 2777 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2778 return None 2779 2780 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2781 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2782 return exp.OnCommitProperty() 2783 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2784 return exp.OnCommitProperty(delete=True) 2785 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2786 2787 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2788 if self._match_text_seq("SQL", "DATA"): 2789 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2790 return None 2791 2792 def _parse_distkey(self) -> exp.DistKeyProperty: 2793 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2794 2795 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2796 table = self._parse_table(schema=True) 2797 2798 options = [] 2799 while self._match_texts(("INCLUDING", "EXCLUDING")): 2800 this = self._prev.text.upper() 2801 2802 id_var = self._parse_id_var() 2803 if not id_var: 2804 return None 2805 2806 options.append( 2807 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2808 ) 2809 2810 return self.expression(exp.LikeProperty, this=table, expressions=options) 2811 2812 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2813 return self.expression( 2814 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2815 ) 2816 2817 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2818 self._match(TokenType.EQ) 2819 return self.expression( 2820 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2821 ) 2822 2823 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2824 self._match_text_seq("WITH", "CONNECTION") 2825 return self.expression( 2826 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2827 ) 2828 2829 def _parse_returns(self) -> exp.ReturnsProperty: 2830 value: t.Optional[exp.Expression] 2831 null = None 2832 is_table = self._match(TokenType.TABLE) 2833 2834 if is_table: 2835 if self._match(TokenType.LT): 2836 value = self.expression( 2837 exp.Schema, 2838 this="TABLE", 2839 expressions=self._parse_csv(self._parse_struct_types), 2840 ) 2841 if not self._match(TokenType.GT): 2842 self.raise_error("Expecting >") 2843 else: 2844 value = self._parse_schema(exp.var("TABLE")) 2845 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2846 null = True 2847 value = None 2848 else: 2849 value = self._parse_types() 2850 2851 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2852 2853 def _parse_describe(self) -> exp.Describe: 2854 kind = self._match_set(self.CREATABLES) and self._prev.text 2855 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2856 if self._match(TokenType.DOT): 2857 style = None 2858 self._retreat(self._index - 2) 2859 2860 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2861 2862 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2863 this = self._parse_statement() 2864 else: 2865 this = self._parse_table(schema=True) 2866 2867 properties = self._parse_properties() 2868 expressions = properties.expressions if properties else None 2869 partition = self._parse_partition() 2870 return self.expression( 2871 exp.Describe, 2872 this=this, 2873 style=style, 2874 kind=kind, 2875 expressions=expressions, 2876 partition=partition, 2877 format=format, 2878 ) 2879 2880 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2881 kind = self._prev.text.upper() 2882 expressions = [] 2883 2884 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2885 if self._match(TokenType.WHEN): 2886 expression = self._parse_disjunction() 2887 self._match(TokenType.THEN) 2888 else: 2889 expression = None 2890 2891 else_ = self._match(TokenType.ELSE) 2892 2893 if not self._match(TokenType.INTO): 2894 return None 2895 2896 return self.expression( 2897 exp.ConditionalInsert, 2898 this=self.expression( 2899 exp.Insert, 2900 this=self._parse_table(schema=True), 2901 expression=self._parse_derived_table_values(), 2902 ), 2903 expression=expression, 2904 else_=else_, 2905 ) 2906 2907 expression = parse_conditional_insert() 2908 while expression is not None: 2909 expressions.append(expression) 2910 expression = parse_conditional_insert() 2911 2912 return self.expression( 2913 exp.MultitableInserts, 2914 kind=kind, 2915 comments=comments, 2916 expressions=expressions, 2917 source=self._parse_table(), 2918 ) 2919 2920 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2921 comments = [] 2922 hint = self._parse_hint() 2923 overwrite = self._match(TokenType.OVERWRITE) 2924 ignore = self._match(TokenType.IGNORE) 2925 local = self._match_text_seq("LOCAL") 2926 alternative = None 2927 is_function = None 2928 2929 if self._match_text_seq("DIRECTORY"): 2930 this: t.Optional[exp.Expression] = self.expression( 2931 exp.Directory, 2932 this=self._parse_var_or_string(), 2933 local=local, 2934 row_format=self._parse_row_format(match_row=True), 2935 ) 2936 else: 2937 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2938 comments += ensure_list(self._prev_comments) 2939 return self._parse_multitable_inserts(comments) 2940 2941 if self._match(TokenType.OR): 2942 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2943 2944 self._match(TokenType.INTO) 2945 comments += ensure_list(self._prev_comments) 2946 self._match(TokenType.TABLE) 2947 is_function = self._match(TokenType.FUNCTION) 2948 2949 this = ( 2950 self._parse_table(schema=True, parse_partition=True) 2951 if not is_function 2952 else self._parse_function() 2953 ) 2954 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2955 this.set("alias", self._parse_table_alias()) 2956 2957 returning = self._parse_returning() 2958 2959 return self.expression( 2960 exp.Insert, 2961 comments=comments, 2962 hint=hint, 2963 is_function=is_function, 2964 this=this, 2965 stored=self._match_text_seq("STORED") and self._parse_stored(), 2966 by_name=self._match_text_seq("BY", "NAME"), 2967 exists=self._parse_exists(), 2968 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2969 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2970 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2971 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2972 conflict=self._parse_on_conflict(), 2973 returning=returning or self._parse_returning(), 2974 overwrite=overwrite, 2975 alternative=alternative, 2976 ignore=ignore, 2977 source=self._match(TokenType.TABLE) and self._parse_table(), 2978 ) 2979 2980 def _parse_kill(self) -> exp.Kill: 2981 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2982 2983 return self.expression( 2984 exp.Kill, 2985 this=self._parse_primary(), 2986 kind=kind, 2987 ) 2988 2989 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2990 conflict = self._match_text_seq("ON", "CONFLICT") 2991 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2992 2993 if not conflict and not duplicate: 2994 return None 2995 2996 conflict_keys = None 2997 constraint = None 2998 2999 if conflict: 3000 if self._match_text_seq("ON", "CONSTRAINT"): 3001 constraint = self._parse_id_var() 3002 elif self._match(TokenType.L_PAREN): 3003 conflict_keys = self._parse_csv(self._parse_id_var) 3004 self._match_r_paren() 3005 3006 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 3007 if self._prev.token_type == TokenType.UPDATE: 3008 self._match(TokenType.SET) 3009 expressions = self._parse_csv(self._parse_equality) 3010 else: 3011 expressions = None 3012 3013 return self.expression( 3014 exp.OnConflict, 3015 duplicate=duplicate, 3016 expressions=expressions, 3017 action=action, 3018 conflict_keys=conflict_keys, 3019 constraint=constraint, 3020 where=self._parse_where(), 3021 ) 3022 3023 def _parse_returning(self) -> t.Optional[exp.Returning]: 3024 if not self._match(TokenType.RETURNING): 3025 return None 3026 return self.expression( 3027 exp.Returning, 3028 expressions=self._parse_csv(self._parse_expression), 3029 into=self._match(TokenType.INTO) and self._parse_table_part(), 3030 ) 3031 3032 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3033 if not self._match(TokenType.FORMAT): 3034 return None 3035 return self._parse_row_format() 3036 3037 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 3038 index = self._index 3039 with_ = with_ or self._match_text_seq("WITH") 3040 3041 if not self._match(TokenType.SERDE_PROPERTIES): 3042 self._retreat(index) 3043 return None 3044 return self.expression( 3045 exp.SerdeProperties, 3046 **{ # type: ignore 3047 "expressions": self._parse_wrapped_properties(), 3048 "with": with_, 3049 }, 3050 ) 3051 3052 def _parse_row_format( 3053 self, match_row: bool = False 3054 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3055 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 3056 return None 3057 3058 if self._match_text_seq("SERDE"): 3059 this = self._parse_string() 3060 3061 serde_properties = self._parse_serde_properties() 3062 3063 return self.expression( 3064 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 3065 ) 3066 3067 self._match_text_seq("DELIMITED") 3068 3069 kwargs = {} 3070 3071 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 3072 kwargs["fields"] = self._parse_string() 3073 if self._match_text_seq("ESCAPED", "BY"): 3074 kwargs["escaped"] = self._parse_string() 3075 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 3076 kwargs["collection_items"] = self._parse_string() 3077 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 3078 kwargs["map_keys"] = self._parse_string() 3079 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3080 kwargs["lines"] = self._parse_string() 3081 if self._match_text_seq("NULL", "DEFINED", "AS"): 3082 kwargs["null"] = self._parse_string() 3083 3084 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 3085 3086 def _parse_load(self) -> exp.LoadData | exp.Command: 3087 if self._match_text_seq("DATA"): 3088 local = self._match_text_seq("LOCAL") 3089 self._match_text_seq("INPATH") 3090 inpath = self._parse_string() 3091 overwrite = self._match(TokenType.OVERWRITE) 3092 self._match_pair(TokenType.INTO, TokenType.TABLE) 3093 3094 return self.expression( 3095 exp.LoadData, 3096 this=self._parse_table(schema=True), 3097 local=local, 3098 overwrite=overwrite, 3099 inpath=inpath, 3100 partition=self._parse_partition(), 3101 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3102 serde=self._match_text_seq("SERDE") and self._parse_string(), 3103 ) 3104 return self._parse_as_command(self._prev) 3105 3106 def _parse_delete(self) -> exp.Delete: 3107 # This handles MySQL's "Multiple-Table Syntax" 3108 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3109 tables = None 3110 if not self._match(TokenType.FROM, advance=False): 3111 tables = self._parse_csv(self._parse_table) or None 3112 3113 returning = self._parse_returning() 3114 3115 return self.expression( 3116 exp.Delete, 3117 tables=tables, 3118 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3119 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3120 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3121 where=self._parse_where(), 3122 returning=returning or self._parse_returning(), 3123 limit=self._parse_limit(), 3124 ) 3125 3126 def _parse_update(self) -> exp.Update: 3127 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 3128 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 3129 returning = self._parse_returning() 3130 return self.expression( 3131 exp.Update, 3132 **{ # type: ignore 3133 "this": this, 3134 "expressions": expressions, 3135 "from": self._parse_from(joins=True), 3136 "where": self._parse_where(), 3137 "returning": returning or self._parse_returning(), 3138 "order": self._parse_order(), 3139 "limit": self._parse_limit(), 3140 }, 3141 ) 3142 3143 def _parse_use(self) -> exp.Use: 3144 return self.expression( 3145 exp.Use, 3146 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3147 this=self._parse_table(schema=False), 3148 ) 3149 3150 def _parse_uncache(self) -> exp.Uncache: 3151 if not self._match(TokenType.TABLE): 3152 self.raise_error("Expecting TABLE after UNCACHE") 3153 3154 return self.expression( 3155 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3156 ) 3157 3158 def _parse_cache(self) -> exp.Cache: 3159 lazy = self._match_text_seq("LAZY") 3160 self._match(TokenType.TABLE) 3161 table = self._parse_table(schema=True) 3162 3163 options = [] 3164 if self._match_text_seq("OPTIONS"): 3165 self._match_l_paren() 3166 k = self._parse_string() 3167 self._match(TokenType.EQ) 3168 v = self._parse_string() 3169 options = [k, v] 3170 self._match_r_paren() 3171 3172 self._match(TokenType.ALIAS) 3173 return self.expression( 3174 exp.Cache, 3175 this=table, 3176 lazy=lazy, 3177 options=options, 3178 expression=self._parse_select(nested=True), 3179 ) 3180 3181 def _parse_partition(self) -> t.Optional[exp.Partition]: 3182 if not self._match_texts(self.PARTITION_KEYWORDS): 3183 return None 3184 3185 return self.expression( 3186 exp.Partition, 3187 subpartition=self._prev.text.upper() == "SUBPARTITION", 3188 expressions=self._parse_wrapped_csv(self._parse_assignment), 3189 ) 3190 3191 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3192 def _parse_value_expression() -> t.Optional[exp.Expression]: 3193 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3194 return exp.var(self._prev.text.upper()) 3195 return self._parse_expression() 3196 3197 if self._match(TokenType.L_PAREN): 3198 expressions = self._parse_csv(_parse_value_expression) 3199 self._match_r_paren() 3200 return self.expression(exp.Tuple, expressions=expressions) 3201 3202 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3203 expression = self._parse_expression() 3204 if expression: 3205 return self.expression(exp.Tuple, expressions=[expression]) 3206 return None 3207 3208 def _parse_projections(self) -> t.List[exp.Expression]: 3209 return self._parse_expressions() 3210 3211 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3212 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3213 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3214 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3215 ) 3216 elif self._match(TokenType.FROM): 3217 from_ = self._parse_from(skip_from_token=True, consume_pipe=True) 3218 # Support parentheses for duckdb FROM-first syntax 3219 select = self._parse_select(from_=from_) 3220 if select: 3221 if not select.args.get("from"): 3222 select.set("from", from_) 3223 this = select 3224 else: 3225 this = exp.select("*").from_(t.cast(exp.From, from_)) 3226 else: 3227 this = ( 3228 self._parse_table(consume_pipe=True) 3229 if table 3230 else self._parse_select(nested=True, parse_set_operation=False) 3231 ) 3232 3233 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3234 # in case a modifier (e.g. join) is following 3235 if table and isinstance(this, exp.Values) and this.alias: 3236 alias = this.args["alias"].pop() 3237 this = exp.Table(this=this, alias=alias) 3238 3239 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3240 3241 return this 3242 3243 def _parse_select( 3244 self, 3245 nested: bool = False, 3246 table: bool = False, 3247 parse_subquery_alias: bool = True, 3248 parse_set_operation: bool = True, 3249 consume_pipe: bool = True, 3250 from_: t.Optional[exp.From] = None, 3251 ) -> t.Optional[exp.Expression]: 3252 query = self._parse_select_query( 3253 nested=nested, 3254 table=table, 3255 parse_subquery_alias=parse_subquery_alias, 3256 parse_set_operation=parse_set_operation, 3257 ) 3258 3259 if consume_pipe and self._match(TokenType.PIPE_GT, advance=False): 3260 if not query and from_: 3261 query = exp.select("*").from_(from_) 3262 if isinstance(query, exp.Query): 3263 query = self._parse_pipe_syntax_query(query) 3264 query = query.subquery(copy=False) if query and table else query 3265 3266 return query 3267 3268 def _parse_select_query( 3269 self, 3270 nested: bool = False, 3271 table: bool = False, 3272 parse_subquery_alias: bool = True, 3273 parse_set_operation: bool = True, 3274 ) -> t.Optional[exp.Expression]: 3275 cte = self._parse_with() 3276 3277 if cte: 3278 this = self._parse_statement() 3279 3280 if not this: 3281 self.raise_error("Failed to parse any statement following CTE") 3282 return cte 3283 3284 if "with" in this.arg_types: 3285 this.set("with", cte) 3286 else: 3287 self.raise_error(f"{this.key} does not support CTE") 3288 this = cte 3289 3290 return this 3291 3292 # duckdb supports leading with FROM x 3293 from_ = ( 3294 self._parse_from(consume_pipe=True) 3295 if self._match(TokenType.FROM, advance=False) 3296 else None 3297 ) 3298 3299 if self._match(TokenType.SELECT): 3300 comments = self._prev_comments 3301 3302 hint = self._parse_hint() 3303 3304 if self._next and not self._next.token_type == TokenType.DOT: 3305 all_ = self._match(TokenType.ALL) 3306 distinct = self._match_set(self.DISTINCT_TOKENS) 3307 else: 3308 all_, distinct = None, None 3309 3310 kind = ( 3311 self._match(TokenType.ALIAS) 3312 and self._match_texts(("STRUCT", "VALUE")) 3313 and self._prev.text.upper() 3314 ) 3315 3316 if distinct: 3317 distinct = self.expression( 3318 exp.Distinct, 3319 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3320 ) 3321 3322 if all_ and distinct: 3323 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3324 3325 operation_modifiers = [] 3326 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3327 operation_modifiers.append(exp.var(self._prev.text.upper())) 3328 3329 limit = self._parse_limit(top=True) 3330 projections = self._parse_projections() 3331 3332 this = self.expression( 3333 exp.Select, 3334 kind=kind, 3335 hint=hint, 3336 distinct=distinct, 3337 expressions=projections, 3338 limit=limit, 3339 operation_modifiers=operation_modifiers or None, 3340 ) 3341 this.comments = comments 3342 3343 into = self._parse_into() 3344 if into: 3345 this.set("into", into) 3346 3347 if not from_: 3348 from_ = self._parse_from() 3349 3350 if from_: 3351 this.set("from", from_) 3352 3353 this = self._parse_query_modifiers(this) 3354 elif (table or nested) and self._match(TokenType.L_PAREN): 3355 this = self._parse_wrapped_select(table=table) 3356 3357 # We return early here so that the UNION isn't attached to the subquery by the 3358 # following call to _parse_set_operations, but instead becomes the parent node 3359 self._match_r_paren() 3360 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3361 elif self._match(TokenType.VALUES, advance=False): 3362 this = self._parse_derived_table_values() 3363 elif from_: 3364 this = exp.select("*").from_(from_.this, copy=False) 3365 elif self._match(TokenType.SUMMARIZE): 3366 table = self._match(TokenType.TABLE) 3367 this = self._parse_select() or self._parse_string() or self._parse_table() 3368 return self.expression(exp.Summarize, this=this, table=table) 3369 elif self._match(TokenType.DESCRIBE): 3370 this = self._parse_describe() 3371 elif self._match_text_seq("STREAM"): 3372 this = self._parse_function() 3373 if this: 3374 this = self.expression(exp.Stream, this=this) 3375 else: 3376 self._retreat(self._index - 1) 3377 else: 3378 this = None 3379 3380 return self._parse_set_operations(this) if parse_set_operation else this 3381 3382 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3383 self._match_text_seq("SEARCH") 3384 3385 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3386 3387 if not kind: 3388 return None 3389 3390 self._match_text_seq("FIRST", "BY") 3391 3392 return self.expression( 3393 exp.RecursiveWithSearch, 3394 kind=kind, 3395 this=self._parse_id_var(), 3396 expression=self._match_text_seq("SET") and self._parse_id_var(), 3397 using=self._match_text_seq("USING") and self._parse_id_var(), 3398 ) 3399 3400 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3401 if not skip_with_token and not self._match(TokenType.WITH): 3402 return None 3403 3404 comments = self._prev_comments 3405 recursive = self._match(TokenType.RECURSIVE) 3406 3407 last_comments = None 3408 expressions = [] 3409 while True: 3410 cte = self._parse_cte() 3411 if isinstance(cte, exp.CTE): 3412 expressions.append(cte) 3413 if last_comments: 3414 cte.add_comments(last_comments) 3415 3416 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3417 break 3418 else: 3419 self._match(TokenType.WITH) 3420 3421 last_comments = self._prev_comments 3422 3423 return self.expression( 3424 exp.With, 3425 comments=comments, 3426 expressions=expressions, 3427 recursive=recursive, 3428 search=self._parse_recursive_with_search(), 3429 ) 3430 3431 def _parse_cte(self) -> t.Optional[exp.CTE]: 3432 index = self._index 3433 3434 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3435 if not alias or not alias.this: 3436 self.raise_error("Expected CTE to have alias") 3437 3438 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3439 self._retreat(index) 3440 return None 3441 3442 comments = self._prev_comments 3443 3444 if self._match_text_seq("NOT", "MATERIALIZED"): 3445 materialized = False 3446 elif self._match_text_seq("MATERIALIZED"): 3447 materialized = True 3448 else: 3449 materialized = None 3450 3451 cte = self.expression( 3452 exp.CTE, 3453 this=self._parse_wrapped(self._parse_statement), 3454 alias=alias, 3455 materialized=materialized, 3456 comments=comments, 3457 ) 3458 3459 values = cte.this 3460 if isinstance(values, exp.Values): 3461 if values.alias: 3462 cte.set("this", exp.select("*").from_(values)) 3463 else: 3464 cte.set("this", exp.select("*").from_(exp.alias_(values, "_values", table=True))) 3465 3466 return cte 3467 3468 def _parse_table_alias( 3469 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3470 ) -> t.Optional[exp.TableAlias]: 3471 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3472 # so this section tries to parse the clause version and if it fails, it treats the token 3473 # as an identifier (alias) 3474 if self._can_parse_limit_or_offset(): 3475 return None 3476 3477 any_token = self._match(TokenType.ALIAS) 3478 alias = ( 3479 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3480 or self._parse_string_as_identifier() 3481 ) 3482 3483 index = self._index 3484 if self._match(TokenType.L_PAREN): 3485 columns = self._parse_csv(self._parse_function_parameter) 3486 self._match_r_paren() if columns else self._retreat(index) 3487 else: 3488 columns = None 3489 3490 if not alias and not columns: 3491 return None 3492 3493 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3494 3495 # We bubble up comments from the Identifier to the TableAlias 3496 if isinstance(alias, exp.Identifier): 3497 table_alias.add_comments(alias.pop_comments()) 3498 3499 return table_alias 3500 3501 def _parse_subquery( 3502 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3503 ) -> t.Optional[exp.Subquery]: 3504 if not this: 3505 return None 3506 3507 return self.expression( 3508 exp.Subquery, 3509 this=this, 3510 pivots=self._parse_pivots(), 3511 alias=self._parse_table_alias() if parse_alias else None, 3512 sample=self._parse_table_sample(), 3513 ) 3514 3515 def _implicit_unnests_to_explicit(self, this: E) -> E: 3516 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3517 3518 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3519 for i, join in enumerate(this.args.get("joins") or []): 3520 table = join.this 3521 normalized_table = table.copy() 3522 normalized_table.meta["maybe_column"] = True 3523 normalized_table = _norm(normalized_table, dialect=self.dialect) 3524 3525 if isinstance(table, exp.Table) and not join.args.get("on"): 3526 if normalized_table.parts[0].name in refs: 3527 table_as_column = table.to_column() 3528 unnest = exp.Unnest(expressions=[table_as_column]) 3529 3530 # Table.to_column creates a parent Alias node that we want to convert to 3531 # a TableAlias and attach to the Unnest, so it matches the parser's output 3532 if isinstance(table.args.get("alias"), exp.TableAlias): 3533 table_as_column.replace(table_as_column.this) 3534 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3535 3536 table.replace(unnest) 3537 3538 refs.add(normalized_table.alias_or_name) 3539 3540 return this 3541 3542 def _parse_query_modifiers( 3543 self, this: t.Optional[exp.Expression] 3544 ) -> t.Optional[exp.Expression]: 3545 if isinstance(this, self.MODIFIABLES): 3546 for join in self._parse_joins(): 3547 this.append("joins", join) 3548 for lateral in iter(self._parse_lateral, None): 3549 this.append("laterals", lateral) 3550 3551 while True: 3552 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3553 modifier_token = self._curr 3554 parser = self.QUERY_MODIFIER_PARSERS[modifier_token.token_type] 3555 key, expression = parser(self) 3556 3557 if expression: 3558 if this.args.get(key): 3559 self.raise_error( 3560 f"Found multiple '{modifier_token.text.upper()}' clauses", 3561 token=modifier_token, 3562 ) 3563 3564 this.set(key, expression) 3565 if key == "limit": 3566 offset = expression.args.pop("offset", None) 3567 3568 if offset: 3569 offset = exp.Offset(expression=offset) 3570 this.set("offset", offset) 3571 3572 limit_by_expressions = expression.expressions 3573 expression.set("expressions", None) 3574 offset.set("expressions", limit_by_expressions) 3575 continue 3576 break 3577 3578 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3579 this = self._implicit_unnests_to_explicit(this) 3580 3581 return this 3582 3583 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3584 start = self._curr 3585 while self._curr: 3586 self._advance() 3587 3588 end = self._tokens[self._index - 1] 3589 return exp.Hint(expressions=[self._find_sql(start, end)]) 3590 3591 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3592 return self._parse_function_call() 3593 3594 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3595 start_index = self._index 3596 should_fallback_to_string = False 3597 3598 hints = [] 3599 try: 3600 for hint in iter( 3601 lambda: self._parse_csv( 3602 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3603 ), 3604 [], 3605 ): 3606 hints.extend(hint) 3607 except ParseError: 3608 should_fallback_to_string = True 3609 3610 if should_fallback_to_string or self._curr: 3611 self._retreat(start_index) 3612 return self._parse_hint_fallback_to_string() 3613 3614 return self.expression(exp.Hint, expressions=hints) 3615 3616 def _parse_hint(self) -> t.Optional[exp.Hint]: 3617 if self._match(TokenType.HINT) and self._prev_comments: 3618 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3619 3620 return None 3621 3622 def _parse_into(self) -> t.Optional[exp.Into]: 3623 if not self._match(TokenType.INTO): 3624 return None 3625 3626 temp = self._match(TokenType.TEMPORARY) 3627 unlogged = self._match_text_seq("UNLOGGED") 3628 self._match(TokenType.TABLE) 3629 3630 return self.expression( 3631 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3632 ) 3633 3634 def _parse_from( 3635 self, 3636 joins: bool = False, 3637 skip_from_token: bool = False, 3638 consume_pipe: bool = False, 3639 ) -> t.Optional[exp.From]: 3640 if not skip_from_token and not self._match(TokenType.FROM): 3641 return None 3642 3643 return self.expression( 3644 exp.From, 3645 comments=self._prev_comments, 3646 this=self._parse_table(joins=joins, consume_pipe=consume_pipe), 3647 ) 3648 3649 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3650 return self.expression( 3651 exp.MatchRecognizeMeasure, 3652 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3653 this=self._parse_expression(), 3654 ) 3655 3656 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3657 if not self._match(TokenType.MATCH_RECOGNIZE): 3658 return None 3659 3660 self._match_l_paren() 3661 3662 partition = self._parse_partition_by() 3663 order = self._parse_order() 3664 3665 measures = ( 3666 self._parse_csv(self._parse_match_recognize_measure) 3667 if self._match_text_seq("MEASURES") 3668 else None 3669 ) 3670 3671 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3672 rows = exp.var("ONE ROW PER MATCH") 3673 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3674 text = "ALL ROWS PER MATCH" 3675 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3676 text += " SHOW EMPTY MATCHES" 3677 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3678 text += " OMIT EMPTY MATCHES" 3679 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3680 text += " WITH UNMATCHED ROWS" 3681 rows = exp.var(text) 3682 else: 3683 rows = None 3684 3685 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3686 text = "AFTER MATCH SKIP" 3687 if self._match_text_seq("PAST", "LAST", "ROW"): 3688 text += " PAST LAST ROW" 3689 elif self._match_text_seq("TO", "NEXT", "ROW"): 3690 text += " TO NEXT ROW" 3691 elif self._match_text_seq("TO", "FIRST"): 3692 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3693 elif self._match_text_seq("TO", "LAST"): 3694 text += f" TO LAST {self._advance_any().text}" # type: ignore 3695 after = exp.var(text) 3696 else: 3697 after = None 3698 3699 if self._match_text_seq("PATTERN"): 3700 self._match_l_paren() 3701 3702 if not self._curr: 3703 self.raise_error("Expecting )", self._curr) 3704 3705 paren = 1 3706 start = self._curr 3707 3708 while self._curr and paren > 0: 3709 if self._curr.token_type == TokenType.L_PAREN: 3710 paren += 1 3711 if self._curr.token_type == TokenType.R_PAREN: 3712 paren -= 1 3713 3714 end = self._prev 3715 self._advance() 3716 3717 if paren > 0: 3718 self.raise_error("Expecting )", self._curr) 3719 3720 pattern = exp.var(self._find_sql(start, end)) 3721 else: 3722 pattern = None 3723 3724 define = ( 3725 self._parse_csv(self._parse_name_as_expression) 3726 if self._match_text_seq("DEFINE") 3727 else None 3728 ) 3729 3730 self._match_r_paren() 3731 3732 return self.expression( 3733 exp.MatchRecognize, 3734 partition_by=partition, 3735 order=order, 3736 measures=measures, 3737 rows=rows, 3738 after=after, 3739 pattern=pattern, 3740 define=define, 3741 alias=self._parse_table_alias(), 3742 ) 3743 3744 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3745 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3746 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3747 cross_apply = False 3748 3749 if cross_apply is not None: 3750 this = self._parse_select(table=True) 3751 view = None 3752 outer = None 3753 elif self._match(TokenType.LATERAL): 3754 this = self._parse_select(table=True) 3755 view = self._match(TokenType.VIEW) 3756 outer = self._match(TokenType.OUTER) 3757 else: 3758 return None 3759 3760 if not this: 3761 this = ( 3762 self._parse_unnest() 3763 or self._parse_function() 3764 or self._parse_id_var(any_token=False) 3765 ) 3766 3767 while self._match(TokenType.DOT): 3768 this = exp.Dot( 3769 this=this, 3770 expression=self._parse_function() or self._parse_id_var(any_token=False), 3771 ) 3772 3773 ordinality: t.Optional[bool] = None 3774 3775 if view: 3776 table = self._parse_id_var(any_token=False) 3777 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3778 table_alias: t.Optional[exp.TableAlias] = self.expression( 3779 exp.TableAlias, this=table, columns=columns 3780 ) 3781 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3782 # We move the alias from the lateral's child node to the lateral itself 3783 table_alias = this.args["alias"].pop() 3784 else: 3785 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3786 table_alias = self._parse_table_alias() 3787 3788 return self.expression( 3789 exp.Lateral, 3790 this=this, 3791 view=view, 3792 outer=outer, 3793 alias=table_alias, 3794 cross_apply=cross_apply, 3795 ordinality=ordinality, 3796 ) 3797 3798 def _parse_join_parts( 3799 self, 3800 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3801 return ( 3802 self._match_set(self.JOIN_METHODS) and self._prev, 3803 self._match_set(self.JOIN_SIDES) and self._prev, 3804 self._match_set(self.JOIN_KINDS) and self._prev, 3805 ) 3806 3807 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3808 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3809 this = self._parse_column() 3810 if isinstance(this, exp.Column): 3811 return this.this 3812 return this 3813 3814 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3815 3816 def _parse_join( 3817 self, skip_join_token: bool = False, parse_bracket: bool = False 3818 ) -> t.Optional[exp.Join]: 3819 if self._match(TokenType.COMMA): 3820 table = self._try_parse(self._parse_table) 3821 cross_join = self.expression(exp.Join, this=table) if table else None 3822 3823 if cross_join and self.JOINS_HAVE_EQUAL_PRECEDENCE: 3824 cross_join.set("kind", "CROSS") 3825 3826 return cross_join 3827 3828 index = self._index 3829 method, side, kind = self._parse_join_parts() 3830 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3831 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3832 join_comments = self._prev_comments 3833 3834 if not skip_join_token and not join: 3835 self._retreat(index) 3836 kind = None 3837 method = None 3838 side = None 3839 3840 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3841 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3842 3843 if not skip_join_token and not join and not outer_apply and not cross_apply: 3844 return None 3845 3846 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3847 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3848 kwargs["expressions"] = self._parse_csv( 3849 lambda: self._parse_table(parse_bracket=parse_bracket) 3850 ) 3851 3852 if method: 3853 kwargs["method"] = method.text 3854 if side: 3855 kwargs["side"] = side.text 3856 if kind: 3857 kwargs["kind"] = kind.text 3858 if hint: 3859 kwargs["hint"] = hint 3860 3861 if self._match(TokenType.MATCH_CONDITION): 3862 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3863 3864 if self._match(TokenType.ON): 3865 kwargs["on"] = self._parse_assignment() 3866 elif self._match(TokenType.USING): 3867 kwargs["using"] = self._parse_using_identifiers() 3868 elif ( 3869 not method 3870 and not (outer_apply or cross_apply) 3871 and not isinstance(kwargs["this"], exp.Unnest) 3872 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3873 ): 3874 index = self._index 3875 joins: t.Optional[list] = list(self._parse_joins()) 3876 3877 if joins and self._match(TokenType.ON): 3878 kwargs["on"] = self._parse_assignment() 3879 elif joins and self._match(TokenType.USING): 3880 kwargs["using"] = self._parse_using_identifiers() 3881 else: 3882 joins = None 3883 self._retreat(index) 3884 3885 kwargs["this"].set("joins", joins if joins else None) 3886 3887 kwargs["pivots"] = self._parse_pivots() 3888 3889 comments = [c for token in (method, side, kind) if token for c in token.comments] 3890 comments = (join_comments or []) + comments 3891 3892 if ( 3893 self.ADD_JOIN_ON_TRUE 3894 and not kwargs.get("on") 3895 and not kwargs.get("using") 3896 and not kwargs.get("method") 3897 and kwargs.get("kind") in (None, "INNER", "OUTER") 3898 ): 3899 kwargs["on"] = exp.true() 3900 3901 return self.expression(exp.Join, comments=comments, **kwargs) 3902 3903 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3904 this = self._parse_assignment() 3905 3906 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3907 return this 3908 3909 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3910 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3911 3912 return this 3913 3914 def _parse_index_params(self) -> exp.IndexParameters: 3915 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3916 3917 if self._match(TokenType.L_PAREN, advance=False): 3918 columns = self._parse_wrapped_csv(self._parse_with_operator) 3919 else: 3920 columns = None 3921 3922 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3923 partition_by = self._parse_partition_by() 3924 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3925 tablespace = ( 3926 self._parse_var(any_token=True) 3927 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3928 else None 3929 ) 3930 where = self._parse_where() 3931 3932 on = self._parse_field() if self._match(TokenType.ON) else None 3933 3934 return self.expression( 3935 exp.IndexParameters, 3936 using=using, 3937 columns=columns, 3938 include=include, 3939 partition_by=partition_by, 3940 where=where, 3941 with_storage=with_storage, 3942 tablespace=tablespace, 3943 on=on, 3944 ) 3945 3946 def _parse_index( 3947 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3948 ) -> t.Optional[exp.Index]: 3949 if index or anonymous: 3950 unique = None 3951 primary = None 3952 amp = None 3953 3954 self._match(TokenType.ON) 3955 self._match(TokenType.TABLE) # hive 3956 table = self._parse_table_parts(schema=True) 3957 else: 3958 unique = self._match(TokenType.UNIQUE) 3959 primary = self._match_text_seq("PRIMARY") 3960 amp = self._match_text_seq("AMP") 3961 3962 if not self._match(TokenType.INDEX): 3963 return None 3964 3965 index = self._parse_id_var() 3966 table = None 3967 3968 params = self._parse_index_params() 3969 3970 return self.expression( 3971 exp.Index, 3972 this=index, 3973 table=table, 3974 unique=unique, 3975 primary=primary, 3976 amp=amp, 3977 params=params, 3978 ) 3979 3980 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3981 hints: t.List[exp.Expression] = [] 3982 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3983 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3984 hints.append( 3985 self.expression( 3986 exp.WithTableHint, 3987 expressions=self._parse_csv( 3988 lambda: self._parse_function() or self._parse_var(any_token=True) 3989 ), 3990 ) 3991 ) 3992 self._match_r_paren() 3993 else: 3994 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3995 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3996 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3997 3998 self._match_set((TokenType.INDEX, TokenType.KEY)) 3999 if self._match(TokenType.FOR): 4000 hint.set("target", self._advance_any() and self._prev.text.upper()) 4001 4002 hint.set("expressions", self._parse_wrapped_id_vars()) 4003 hints.append(hint) 4004 4005 return hints or None 4006 4007 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 4008 return ( 4009 (not schema and self._parse_function(optional_parens=False)) 4010 or self._parse_id_var(any_token=False) 4011 or self._parse_string_as_identifier() 4012 or self._parse_placeholder() 4013 ) 4014 4015 def _parse_table_parts( 4016 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 4017 ) -> exp.Table: 4018 catalog = None 4019 db = None 4020 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 4021 4022 while self._match(TokenType.DOT): 4023 if catalog: 4024 # This allows nesting the table in arbitrarily many dot expressions if needed 4025 table = self.expression( 4026 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 4027 ) 4028 else: 4029 catalog = db 4030 db = table 4031 # "" used for tsql FROM a..b case 4032 table = self._parse_table_part(schema=schema) or "" 4033 4034 if ( 4035 wildcard 4036 and self._is_connected() 4037 and (isinstance(table, exp.Identifier) or not table) 4038 and self._match(TokenType.STAR) 4039 ): 4040 if isinstance(table, exp.Identifier): 4041 table.args["this"] += "*" 4042 else: 4043 table = exp.Identifier(this="*") 4044 4045 # We bubble up comments from the Identifier to the Table 4046 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 4047 4048 if is_db_reference: 4049 catalog = db 4050 db = table 4051 table = None 4052 4053 if not table and not is_db_reference: 4054 self.raise_error(f"Expected table name but got {self._curr}") 4055 if not db and is_db_reference: 4056 self.raise_error(f"Expected database name but got {self._curr}") 4057 4058 table = self.expression( 4059 exp.Table, 4060 comments=comments, 4061 this=table, 4062 db=db, 4063 catalog=catalog, 4064 ) 4065 4066 changes = self._parse_changes() 4067 if changes: 4068 table.set("changes", changes) 4069 4070 at_before = self._parse_historical_data() 4071 if at_before: 4072 table.set("when", at_before) 4073 4074 pivots = self._parse_pivots() 4075 if pivots: 4076 table.set("pivots", pivots) 4077 4078 return table 4079 4080 def _parse_table( 4081 self, 4082 schema: bool = False, 4083 joins: bool = False, 4084 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 4085 parse_bracket: bool = False, 4086 is_db_reference: bool = False, 4087 parse_partition: bool = False, 4088 consume_pipe: bool = False, 4089 ) -> t.Optional[exp.Expression]: 4090 lateral = self._parse_lateral() 4091 if lateral: 4092 return lateral 4093 4094 unnest = self._parse_unnest() 4095 if unnest: 4096 return unnest 4097 4098 values = self._parse_derived_table_values() 4099 if values: 4100 return values 4101 4102 subquery = self._parse_select(table=True, consume_pipe=consume_pipe) 4103 if subquery: 4104 if not subquery.args.get("pivots"): 4105 subquery.set("pivots", self._parse_pivots()) 4106 return subquery 4107 4108 bracket = parse_bracket and self._parse_bracket(None) 4109 bracket = self.expression(exp.Table, this=bracket) if bracket else None 4110 4111 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 4112 self._parse_table 4113 ) 4114 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 4115 4116 only = self._match(TokenType.ONLY) 4117 4118 this = t.cast( 4119 exp.Expression, 4120 bracket 4121 or rows_from 4122 or self._parse_bracket( 4123 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 4124 ), 4125 ) 4126 4127 if only: 4128 this.set("only", only) 4129 4130 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 4131 self._match_text_seq("*") 4132 4133 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 4134 if parse_partition and self._match(TokenType.PARTITION, advance=False): 4135 this.set("partition", self._parse_partition()) 4136 4137 if schema: 4138 return self._parse_schema(this=this) 4139 4140 version = self._parse_version() 4141 4142 if version: 4143 this.set("version", version) 4144 4145 if self.dialect.ALIAS_POST_TABLESAMPLE: 4146 this.set("sample", self._parse_table_sample()) 4147 4148 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4149 if alias: 4150 this.set("alias", alias) 4151 4152 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4153 return self.expression( 4154 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 4155 ) 4156 4157 this.set("hints", self._parse_table_hints()) 4158 4159 if not this.args.get("pivots"): 4160 this.set("pivots", self._parse_pivots()) 4161 4162 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4163 this.set("sample", self._parse_table_sample()) 4164 4165 if joins: 4166 for join in self._parse_joins(): 4167 this.append("joins", join) 4168 4169 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4170 this.set("ordinality", True) 4171 this.set("alias", self._parse_table_alias()) 4172 4173 return this 4174 4175 def _parse_version(self) -> t.Optional[exp.Version]: 4176 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4177 this = "TIMESTAMP" 4178 elif self._match(TokenType.VERSION_SNAPSHOT): 4179 this = "VERSION" 4180 else: 4181 return None 4182 4183 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4184 kind = self._prev.text.upper() 4185 start = self._parse_bitwise() 4186 self._match_texts(("TO", "AND")) 4187 end = self._parse_bitwise() 4188 expression: t.Optional[exp.Expression] = self.expression( 4189 exp.Tuple, expressions=[start, end] 4190 ) 4191 elif self._match_text_seq("CONTAINED", "IN"): 4192 kind = "CONTAINED IN" 4193 expression = self.expression( 4194 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4195 ) 4196 elif self._match(TokenType.ALL): 4197 kind = "ALL" 4198 expression = None 4199 else: 4200 self._match_text_seq("AS", "OF") 4201 kind = "AS OF" 4202 expression = self._parse_type() 4203 4204 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4205 4206 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4207 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4208 index = self._index 4209 historical_data = None 4210 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4211 this = self._prev.text.upper() 4212 kind = ( 4213 self._match(TokenType.L_PAREN) 4214 and self._match_texts(self.HISTORICAL_DATA_KIND) 4215 and self._prev.text.upper() 4216 ) 4217 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4218 4219 if expression: 4220 self._match_r_paren() 4221 historical_data = self.expression( 4222 exp.HistoricalData, this=this, kind=kind, expression=expression 4223 ) 4224 else: 4225 self._retreat(index) 4226 4227 return historical_data 4228 4229 def _parse_changes(self) -> t.Optional[exp.Changes]: 4230 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4231 return None 4232 4233 information = self._parse_var(any_token=True) 4234 self._match_r_paren() 4235 4236 return self.expression( 4237 exp.Changes, 4238 information=information, 4239 at_before=self._parse_historical_data(), 4240 end=self._parse_historical_data(), 4241 ) 4242 4243 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4244 if not self._match(TokenType.UNNEST): 4245 return None 4246 4247 expressions = self._parse_wrapped_csv(self._parse_equality) 4248 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4249 4250 alias = self._parse_table_alias() if with_alias else None 4251 4252 if alias: 4253 if self.dialect.UNNEST_COLUMN_ONLY: 4254 if alias.args.get("columns"): 4255 self.raise_error("Unexpected extra column alias in unnest.") 4256 4257 alias.set("columns", [alias.this]) 4258 alias.set("this", None) 4259 4260 columns = alias.args.get("columns") or [] 4261 if offset and len(expressions) < len(columns): 4262 offset = columns.pop() 4263 4264 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4265 self._match(TokenType.ALIAS) 4266 offset = self._parse_id_var( 4267 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4268 ) or exp.to_identifier("offset") 4269 4270 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4271 4272 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4273 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4274 if not is_derived and not ( 4275 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4276 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4277 ): 4278 return None 4279 4280 expressions = self._parse_csv(self._parse_value) 4281 alias = self._parse_table_alias() 4282 4283 if is_derived: 4284 self._match_r_paren() 4285 4286 return self.expression( 4287 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4288 ) 4289 4290 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4291 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4292 as_modifier and self._match_text_seq("USING", "SAMPLE") 4293 ): 4294 return None 4295 4296 bucket_numerator = None 4297 bucket_denominator = None 4298 bucket_field = None 4299 percent = None 4300 size = None 4301 seed = None 4302 4303 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4304 matched_l_paren = self._match(TokenType.L_PAREN) 4305 4306 if self.TABLESAMPLE_CSV: 4307 num = None 4308 expressions = self._parse_csv(self._parse_primary) 4309 else: 4310 expressions = None 4311 num = ( 4312 self._parse_factor() 4313 if self._match(TokenType.NUMBER, advance=False) 4314 else self._parse_primary() or self._parse_placeholder() 4315 ) 4316 4317 if self._match_text_seq("BUCKET"): 4318 bucket_numerator = self._parse_number() 4319 self._match_text_seq("OUT", "OF") 4320 bucket_denominator = bucket_denominator = self._parse_number() 4321 self._match(TokenType.ON) 4322 bucket_field = self._parse_field() 4323 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4324 percent = num 4325 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4326 size = num 4327 else: 4328 percent = num 4329 4330 if matched_l_paren: 4331 self._match_r_paren() 4332 4333 if self._match(TokenType.L_PAREN): 4334 method = self._parse_var(upper=True) 4335 seed = self._match(TokenType.COMMA) and self._parse_number() 4336 self._match_r_paren() 4337 elif self._match_texts(("SEED", "REPEATABLE")): 4338 seed = self._parse_wrapped(self._parse_number) 4339 4340 if not method and self.DEFAULT_SAMPLING_METHOD: 4341 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4342 4343 return self.expression( 4344 exp.TableSample, 4345 expressions=expressions, 4346 method=method, 4347 bucket_numerator=bucket_numerator, 4348 bucket_denominator=bucket_denominator, 4349 bucket_field=bucket_field, 4350 percent=percent, 4351 size=size, 4352 seed=seed, 4353 ) 4354 4355 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4356 return list(iter(self._parse_pivot, None)) or None 4357 4358 def _parse_joins(self) -> t.Iterator[exp.Join]: 4359 return iter(self._parse_join, None) 4360 4361 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4362 if not self._match(TokenType.INTO): 4363 return None 4364 4365 return self.expression( 4366 exp.UnpivotColumns, 4367 this=self._match_text_seq("NAME") and self._parse_column(), 4368 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4369 ) 4370 4371 # https://duckdb.org/docs/sql/statements/pivot 4372 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4373 def _parse_on() -> t.Optional[exp.Expression]: 4374 this = self._parse_bitwise() 4375 4376 if self._match(TokenType.IN): 4377 # PIVOT ... ON col IN (row_val1, row_val2) 4378 return self._parse_in(this) 4379 if self._match(TokenType.ALIAS, advance=False): 4380 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4381 return self._parse_alias(this) 4382 4383 return this 4384 4385 this = self._parse_table() 4386 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4387 into = self._parse_unpivot_columns() 4388 using = self._match(TokenType.USING) and self._parse_csv( 4389 lambda: self._parse_alias(self._parse_function()) 4390 ) 4391 group = self._parse_group() 4392 4393 return self.expression( 4394 exp.Pivot, 4395 this=this, 4396 expressions=expressions, 4397 using=using, 4398 group=group, 4399 unpivot=is_unpivot, 4400 into=into, 4401 ) 4402 4403 def _parse_pivot_in(self) -> exp.In: 4404 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4405 this = self._parse_select_or_expression() 4406 4407 self._match(TokenType.ALIAS) 4408 alias = self._parse_bitwise() 4409 if alias: 4410 if isinstance(alias, exp.Column) and not alias.db: 4411 alias = alias.this 4412 return self.expression(exp.PivotAlias, this=this, alias=alias) 4413 4414 return this 4415 4416 value = self._parse_column() 4417 4418 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4419 self.raise_error("Expecting IN (") 4420 4421 if self._match(TokenType.ANY): 4422 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4423 else: 4424 exprs = self._parse_csv(_parse_aliased_expression) 4425 4426 self._match_r_paren() 4427 return self.expression(exp.In, this=value, expressions=exprs) 4428 4429 def _parse_pivot_aggregation(self) -> t.Optional[exp.Expression]: 4430 func = self._parse_function() 4431 if not func: 4432 if self._prev and self._prev.token_type == TokenType.COMMA: 4433 return None 4434 self.raise_error("Expecting an aggregation function in PIVOT") 4435 4436 return self._parse_alias(func) 4437 4438 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4439 index = self._index 4440 include_nulls = None 4441 4442 if self._match(TokenType.PIVOT): 4443 unpivot = False 4444 elif self._match(TokenType.UNPIVOT): 4445 unpivot = True 4446 4447 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4448 if self._match_text_seq("INCLUDE", "NULLS"): 4449 include_nulls = True 4450 elif self._match_text_seq("EXCLUDE", "NULLS"): 4451 include_nulls = False 4452 else: 4453 return None 4454 4455 expressions = [] 4456 4457 if not self._match(TokenType.L_PAREN): 4458 self._retreat(index) 4459 return None 4460 4461 if unpivot: 4462 expressions = self._parse_csv(self._parse_column) 4463 else: 4464 expressions = self._parse_csv(self._parse_pivot_aggregation) 4465 4466 if not expressions: 4467 self.raise_error("Failed to parse PIVOT's aggregation list") 4468 4469 if not self._match(TokenType.FOR): 4470 self.raise_error("Expecting FOR") 4471 4472 fields = [] 4473 while True: 4474 field = self._try_parse(self._parse_pivot_in) 4475 if not field: 4476 break 4477 fields.append(field) 4478 4479 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4480 self._parse_bitwise 4481 ) 4482 4483 group = self._parse_group() 4484 4485 self._match_r_paren() 4486 4487 pivot = self.expression( 4488 exp.Pivot, 4489 expressions=expressions, 4490 fields=fields, 4491 unpivot=unpivot, 4492 include_nulls=include_nulls, 4493 default_on_null=default_on_null, 4494 group=group, 4495 ) 4496 4497 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4498 pivot.set("alias", self._parse_table_alias()) 4499 4500 if not unpivot: 4501 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4502 4503 columns: t.List[exp.Expression] = [] 4504 all_fields = [] 4505 for pivot_field in pivot.fields: 4506 pivot_field_expressions = pivot_field.expressions 4507 4508 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4509 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4510 continue 4511 4512 all_fields.append( 4513 [ 4514 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4515 for fld in pivot_field_expressions 4516 ] 4517 ) 4518 4519 if all_fields: 4520 if names: 4521 all_fields.append(names) 4522 4523 # Generate all possible combinations of the pivot columns 4524 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4525 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4526 for fld_parts_tuple in itertools.product(*all_fields): 4527 fld_parts = list(fld_parts_tuple) 4528 4529 if names and self.PREFIXED_PIVOT_COLUMNS: 4530 # Move the "name" to the front of the list 4531 fld_parts.insert(0, fld_parts.pop(-1)) 4532 4533 columns.append(exp.to_identifier("_".join(fld_parts))) 4534 4535 pivot.set("columns", columns) 4536 4537 return pivot 4538 4539 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4540 return [agg.alias for agg in aggregations if agg.alias] 4541 4542 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4543 if not skip_where_token and not self._match(TokenType.PREWHERE): 4544 return None 4545 4546 return self.expression( 4547 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4548 ) 4549 4550 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4551 if not skip_where_token and not self._match(TokenType.WHERE): 4552 return None 4553 4554 return self.expression( 4555 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4556 ) 4557 4558 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4559 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4560 return None 4561 comments = self._prev_comments 4562 4563 elements: t.Dict[str, t.Any] = defaultdict(list) 4564 4565 if self._match(TokenType.ALL): 4566 elements["all"] = True 4567 elif self._match(TokenType.DISTINCT): 4568 elements["all"] = False 4569 4570 if self._match_set(self.QUERY_MODIFIER_TOKENS, advance=False): 4571 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4572 4573 while True: 4574 index = self._index 4575 4576 elements["expressions"].extend( 4577 self._parse_csv( 4578 lambda: None 4579 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4580 else self._parse_assignment() 4581 ) 4582 ) 4583 4584 before_with_index = self._index 4585 with_prefix = self._match(TokenType.WITH) 4586 4587 if self._match(TokenType.ROLLUP): 4588 elements["rollup"].append( 4589 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4590 ) 4591 elif self._match(TokenType.CUBE): 4592 elements["cube"].append( 4593 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4594 ) 4595 elif self._match(TokenType.GROUPING_SETS): 4596 elements["grouping_sets"].append( 4597 self.expression( 4598 exp.GroupingSets, 4599 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4600 ) 4601 ) 4602 elif self._match_text_seq("TOTALS"): 4603 elements["totals"] = True # type: ignore 4604 4605 if before_with_index <= self._index <= before_with_index + 1: 4606 self._retreat(before_with_index) 4607 break 4608 4609 if index == self._index: 4610 break 4611 4612 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4613 4614 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4615 return self.expression( 4616 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4617 ) 4618 4619 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4620 if self._match(TokenType.L_PAREN): 4621 grouping_set = self._parse_csv(self._parse_bitwise) 4622 self._match_r_paren() 4623 return self.expression(exp.Tuple, expressions=grouping_set) 4624 4625 return self._parse_column() 4626 4627 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4628 if not skip_having_token and not self._match(TokenType.HAVING): 4629 return None 4630 return self.expression( 4631 exp.Having, comments=self._prev_comments, this=self._parse_assignment() 4632 ) 4633 4634 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4635 if not self._match(TokenType.QUALIFY): 4636 return None 4637 return self.expression(exp.Qualify, this=self._parse_assignment()) 4638 4639 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4640 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4641 exp.Prior, this=self._parse_bitwise() 4642 ) 4643 connect = self._parse_assignment() 4644 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4645 return connect 4646 4647 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4648 if skip_start_token: 4649 start = None 4650 elif self._match(TokenType.START_WITH): 4651 start = self._parse_assignment() 4652 else: 4653 return None 4654 4655 self._match(TokenType.CONNECT_BY) 4656 nocycle = self._match_text_seq("NOCYCLE") 4657 connect = self._parse_connect_with_prior() 4658 4659 if not start and self._match(TokenType.START_WITH): 4660 start = self._parse_assignment() 4661 4662 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4663 4664 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4665 this = self._parse_id_var(any_token=True) 4666 if self._match(TokenType.ALIAS): 4667 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4668 return this 4669 4670 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4671 if self._match_text_seq("INTERPOLATE"): 4672 return self._parse_wrapped_csv(self._parse_name_as_expression) 4673 return None 4674 4675 def _parse_order( 4676 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4677 ) -> t.Optional[exp.Expression]: 4678 siblings = None 4679 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4680 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4681 return this 4682 4683 siblings = True 4684 4685 return self.expression( 4686 exp.Order, 4687 comments=self._prev_comments, 4688 this=this, 4689 expressions=self._parse_csv(self._parse_ordered), 4690 siblings=siblings, 4691 ) 4692 4693 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4694 if not self._match(token): 4695 return None 4696 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4697 4698 def _parse_ordered( 4699 self, parse_method: t.Optional[t.Callable] = None 4700 ) -> t.Optional[exp.Ordered]: 4701 this = parse_method() if parse_method else self._parse_assignment() 4702 if not this: 4703 return None 4704 4705 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4706 this = exp.var("ALL") 4707 4708 asc = self._match(TokenType.ASC) 4709 desc = self._match(TokenType.DESC) or (asc and False) 4710 4711 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4712 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4713 4714 nulls_first = is_nulls_first or False 4715 explicitly_null_ordered = is_nulls_first or is_nulls_last 4716 4717 if ( 4718 not explicitly_null_ordered 4719 and ( 4720 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4721 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4722 ) 4723 and self.dialect.NULL_ORDERING != "nulls_are_last" 4724 ): 4725 nulls_first = True 4726 4727 if self._match_text_seq("WITH", "FILL"): 4728 with_fill = self.expression( 4729 exp.WithFill, 4730 **{ # type: ignore 4731 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4732 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4733 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4734 "interpolate": self._parse_interpolate(), 4735 }, 4736 ) 4737 else: 4738 with_fill = None 4739 4740 return self.expression( 4741 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4742 ) 4743 4744 def _parse_limit_options(self) -> exp.LimitOptions: 4745 percent = self._match(TokenType.PERCENT) 4746 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4747 self._match_text_seq("ONLY") 4748 with_ties = self._match_text_seq("WITH", "TIES") 4749 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4750 4751 def _parse_limit( 4752 self, 4753 this: t.Optional[exp.Expression] = None, 4754 top: bool = False, 4755 skip_limit_token: bool = False, 4756 ) -> t.Optional[exp.Expression]: 4757 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4758 comments = self._prev_comments 4759 if top: 4760 limit_paren = self._match(TokenType.L_PAREN) 4761 expression = self._parse_term() if limit_paren else self._parse_number() 4762 4763 if limit_paren: 4764 self._match_r_paren() 4765 4766 limit_options = self._parse_limit_options() 4767 else: 4768 limit_options = None 4769 expression = self._parse_term() 4770 4771 if self._match(TokenType.COMMA): 4772 offset = expression 4773 expression = self._parse_term() 4774 else: 4775 offset = None 4776 4777 limit_exp = self.expression( 4778 exp.Limit, 4779 this=this, 4780 expression=expression, 4781 offset=offset, 4782 comments=comments, 4783 limit_options=limit_options, 4784 expressions=self._parse_limit_by(), 4785 ) 4786 4787 return limit_exp 4788 4789 if self._match(TokenType.FETCH): 4790 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4791 direction = self._prev.text.upper() if direction else "FIRST" 4792 4793 count = self._parse_field(tokens=self.FETCH_TOKENS) 4794 4795 return self.expression( 4796 exp.Fetch, 4797 direction=direction, 4798 count=count, 4799 limit_options=self._parse_limit_options(), 4800 ) 4801 4802 return this 4803 4804 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4805 if not self._match(TokenType.OFFSET): 4806 return this 4807 4808 count = self._parse_term() 4809 self._match_set((TokenType.ROW, TokenType.ROWS)) 4810 4811 return self.expression( 4812 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4813 ) 4814 4815 def _can_parse_limit_or_offset(self) -> bool: 4816 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4817 return False 4818 4819 index = self._index 4820 result = bool( 4821 self._try_parse(self._parse_limit, retreat=True) 4822 or self._try_parse(self._parse_offset, retreat=True) 4823 ) 4824 self._retreat(index) 4825 return result 4826 4827 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4828 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4829 4830 def _parse_locks(self) -> t.List[exp.Lock]: 4831 locks = [] 4832 while True: 4833 update, key = None, None 4834 if self._match_text_seq("FOR", "UPDATE"): 4835 update = True 4836 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4837 "LOCK", "IN", "SHARE", "MODE" 4838 ): 4839 update = False 4840 elif self._match_text_seq("FOR", "KEY", "SHARE"): 4841 update, key = False, True 4842 elif self._match_text_seq("FOR", "NO", "KEY", "UPDATE"): 4843 update, key = True, True 4844 else: 4845 break 4846 4847 expressions = None 4848 if self._match_text_seq("OF"): 4849 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4850 4851 wait: t.Optional[bool | exp.Expression] = None 4852 if self._match_text_seq("NOWAIT"): 4853 wait = True 4854 elif self._match_text_seq("WAIT"): 4855 wait = self._parse_primary() 4856 elif self._match_text_seq("SKIP", "LOCKED"): 4857 wait = False 4858 4859 locks.append( 4860 self.expression( 4861 exp.Lock, update=update, expressions=expressions, wait=wait, key=key 4862 ) 4863 ) 4864 4865 return locks 4866 4867 def parse_set_operation( 4868 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4869 ) -> t.Optional[exp.Expression]: 4870 start = self._index 4871 _, side_token, kind_token = self._parse_join_parts() 4872 4873 side = side_token.text if side_token else None 4874 kind = kind_token.text if kind_token else None 4875 4876 if not self._match_set(self.SET_OPERATIONS): 4877 self._retreat(start) 4878 return None 4879 4880 token_type = self._prev.token_type 4881 4882 if token_type == TokenType.UNION: 4883 operation: t.Type[exp.SetOperation] = exp.Union 4884 elif token_type == TokenType.EXCEPT: 4885 operation = exp.Except 4886 else: 4887 operation = exp.Intersect 4888 4889 comments = self._prev.comments 4890 4891 if self._match(TokenType.DISTINCT): 4892 distinct: t.Optional[bool] = True 4893 elif self._match(TokenType.ALL): 4894 distinct = False 4895 else: 4896 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4897 if distinct is None: 4898 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4899 4900 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4901 "STRICT", "CORRESPONDING" 4902 ) 4903 if self._match_text_seq("CORRESPONDING"): 4904 by_name = True 4905 if not side and not kind: 4906 kind = "INNER" 4907 4908 on_column_list = None 4909 if by_name and self._match_texts(("ON", "BY")): 4910 on_column_list = self._parse_wrapped_csv(self._parse_column) 4911 4912 expression = self._parse_select( 4913 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4914 ) 4915 4916 return self.expression( 4917 operation, 4918 comments=comments, 4919 this=this, 4920 distinct=distinct, 4921 by_name=by_name, 4922 expression=expression, 4923 side=side, 4924 kind=kind, 4925 on=on_column_list, 4926 ) 4927 4928 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4929 while this: 4930 setop = self.parse_set_operation(this) 4931 if not setop: 4932 break 4933 this = setop 4934 4935 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4936 expression = this.expression 4937 4938 if expression: 4939 for arg in self.SET_OP_MODIFIERS: 4940 expr = expression.args.get(arg) 4941 if expr: 4942 this.set(arg, expr.pop()) 4943 4944 return this 4945 4946 def _parse_expression(self) -> t.Optional[exp.Expression]: 4947 return self._parse_alias(self._parse_assignment()) 4948 4949 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4950 this = self._parse_disjunction() 4951 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4952 # This allows us to parse <non-identifier token> := <expr> 4953 this = exp.column( 4954 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4955 ) 4956 4957 while self._match_set(self.ASSIGNMENT): 4958 if isinstance(this, exp.Column) and len(this.parts) == 1: 4959 this = this.this 4960 4961 this = self.expression( 4962 self.ASSIGNMENT[self._prev.token_type], 4963 this=this, 4964 comments=self._prev_comments, 4965 expression=self._parse_assignment(), 4966 ) 4967 4968 return this 4969 4970 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4971 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4972 4973 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4974 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4975 4976 def _parse_equality(self) -> t.Optional[exp.Expression]: 4977 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4978 4979 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4980 return self._parse_tokens(self._parse_range, self.COMPARISON) 4981 4982 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4983 this = this or self._parse_bitwise() 4984 negate = self._match(TokenType.NOT) 4985 4986 if self._match_set(self.RANGE_PARSERS): 4987 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4988 if not expression: 4989 return this 4990 4991 this = expression 4992 elif self._match(TokenType.ISNULL): 4993 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4994 4995 # Postgres supports ISNULL and NOTNULL for conditions. 4996 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4997 if self._match(TokenType.NOTNULL): 4998 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4999 this = self.expression(exp.Not, this=this) 5000 5001 if negate: 5002 this = self._negate_range(this) 5003 5004 if self._match(TokenType.IS): 5005 this = self._parse_is(this) 5006 5007 return this 5008 5009 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5010 if not this: 5011 return this 5012 5013 return self.expression(exp.Not, this=this) 5014 5015 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5016 index = self._index - 1 5017 negate = self._match(TokenType.NOT) 5018 5019 if self._match_text_seq("DISTINCT", "FROM"): 5020 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 5021 return self.expression(klass, this=this, expression=self._parse_bitwise()) 5022 5023 if self._match(TokenType.JSON): 5024 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 5025 5026 if self._match_text_seq("WITH"): 5027 _with = True 5028 elif self._match_text_seq("WITHOUT"): 5029 _with = False 5030 else: 5031 _with = None 5032 5033 unique = self._match(TokenType.UNIQUE) 5034 self._match_text_seq("KEYS") 5035 expression: t.Optional[exp.Expression] = self.expression( 5036 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 5037 ) 5038 else: 5039 expression = self._parse_primary() or self._parse_null() 5040 if not expression: 5041 self._retreat(index) 5042 return None 5043 5044 this = self.expression(exp.Is, this=this, expression=expression) 5045 return self.expression(exp.Not, this=this) if negate else this 5046 5047 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 5048 unnest = self._parse_unnest(with_alias=False) 5049 if unnest: 5050 this = self.expression(exp.In, this=this, unnest=unnest) 5051 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 5052 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 5053 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 5054 5055 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 5056 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 5057 else: 5058 this = self.expression(exp.In, this=this, expressions=expressions) 5059 5060 if matched_l_paren: 5061 self._match_r_paren(this) 5062 elif not self._match(TokenType.R_BRACKET, expression=this): 5063 self.raise_error("Expecting ]") 5064 else: 5065 this = self.expression(exp.In, this=this, field=self._parse_column()) 5066 5067 return this 5068 5069 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 5070 symmetric = None 5071 if self._match_text_seq("SYMMETRIC"): 5072 symmetric = True 5073 elif self._match_text_seq("ASYMMETRIC"): 5074 symmetric = False 5075 5076 low = self._parse_bitwise() 5077 self._match(TokenType.AND) 5078 high = self._parse_bitwise() 5079 5080 return self.expression( 5081 exp.Between, 5082 this=this, 5083 low=low, 5084 high=high, 5085 symmetric=symmetric, 5086 ) 5087 5088 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5089 if not self._match(TokenType.ESCAPE): 5090 return this 5091 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 5092 5093 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 5094 index = self._index 5095 5096 if not self._match(TokenType.INTERVAL) and match_interval: 5097 return None 5098 5099 if self._match(TokenType.STRING, advance=False): 5100 this = self._parse_primary() 5101 else: 5102 this = self._parse_term() 5103 5104 if not this or ( 5105 isinstance(this, exp.Column) 5106 and not this.table 5107 and not this.this.quoted 5108 and this.name.upper() in ("IS", "ROWS") 5109 ): 5110 self._retreat(index) 5111 return None 5112 5113 # handle day-time format interval span with omitted units: 5114 # INTERVAL '<number days> hh[:][mm[:ss[.ff]]]' <maybe `unit TO unit`> 5115 interval_span_units_omitted = None 5116 if ( 5117 this 5118 and this.is_string 5119 and self.SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT 5120 and exp.INTERVAL_DAY_TIME_RE.match(this.name) 5121 ): 5122 index = self._index 5123 5124 # Var "TO" Var 5125 first_unit = self._parse_var(any_token=True, upper=True) 5126 second_unit = None 5127 if first_unit and self._match_text_seq("TO"): 5128 second_unit = self._parse_var(any_token=True, upper=True) 5129 5130 interval_span_units_omitted = not (first_unit and second_unit) 5131 5132 self._retreat(index) 5133 5134 unit = ( 5135 None 5136 if interval_span_units_omitted 5137 else ( 5138 self._parse_function() 5139 or ( 5140 not self._match(TokenType.ALIAS, advance=False) 5141 and self._parse_var(any_token=True, upper=True) 5142 ) 5143 ) 5144 ) 5145 5146 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 5147 # each INTERVAL expression into this canonical form so it's easy to transpile 5148 if this and this.is_number: 5149 this = exp.Literal.string(this.to_py()) 5150 elif this and this.is_string: 5151 parts = exp.INTERVAL_STRING_RE.findall(this.name) 5152 if parts and unit: 5153 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 5154 unit = None 5155 self._retreat(self._index - 1) 5156 5157 if len(parts) == 1: 5158 this = exp.Literal.string(parts[0][0]) 5159 unit = self.expression(exp.Var, this=parts[0][1].upper()) 5160 5161 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 5162 unit = self.expression( 5163 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 5164 ) 5165 5166 interval = self.expression(exp.Interval, this=this, unit=unit) 5167 5168 index = self._index 5169 self._match(TokenType.PLUS) 5170 5171 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 5172 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 5173 return self.expression( 5174 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 5175 ) 5176 5177 self._retreat(index) 5178 return interval 5179 5180 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 5181 this = self._parse_term() 5182 5183 while True: 5184 if self._match_set(self.BITWISE): 5185 this = self.expression( 5186 self.BITWISE[self._prev.token_type], 5187 this=this, 5188 expression=self._parse_term(), 5189 ) 5190 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 5191 this = self.expression( 5192 exp.DPipe, 5193 this=this, 5194 expression=self._parse_term(), 5195 safe=not self.dialect.STRICT_STRING_CONCAT, 5196 ) 5197 elif self._match(TokenType.DQMARK): 5198 this = self.expression( 5199 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 5200 ) 5201 elif self._match_pair(TokenType.LT, TokenType.LT): 5202 this = self.expression( 5203 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 5204 ) 5205 elif self._match_pair(TokenType.GT, TokenType.GT): 5206 this = self.expression( 5207 exp.BitwiseRightShift, this=this, expression=self._parse_term() 5208 ) 5209 else: 5210 break 5211 5212 return this 5213 5214 def _parse_term(self) -> t.Optional[exp.Expression]: 5215 this = self._parse_factor() 5216 5217 while self._match_set(self.TERM): 5218 klass = self.TERM[self._prev.token_type] 5219 comments = self._prev_comments 5220 expression = self._parse_factor() 5221 5222 this = self.expression(klass, this=this, comments=comments, expression=expression) 5223 5224 if isinstance(this, exp.Collate): 5225 expr = this.expression 5226 5227 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5228 # fallback to Identifier / Var 5229 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5230 ident = expr.this 5231 if isinstance(ident, exp.Identifier): 5232 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5233 5234 return this 5235 5236 def _parse_factor(self) -> t.Optional[exp.Expression]: 5237 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5238 this = parse_method() 5239 5240 while self._match_set(self.FACTOR): 5241 klass = self.FACTOR[self._prev.token_type] 5242 comments = self._prev_comments 5243 expression = parse_method() 5244 5245 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5246 self._retreat(self._index - 1) 5247 return this 5248 5249 this = self.expression(klass, this=this, comments=comments, expression=expression) 5250 5251 if isinstance(this, exp.Div): 5252 this.args["typed"] = self.dialect.TYPED_DIVISION 5253 this.args["safe"] = self.dialect.SAFE_DIVISION 5254 5255 return this 5256 5257 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5258 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5259 5260 def _parse_unary(self) -> t.Optional[exp.Expression]: 5261 if self._match_set(self.UNARY_PARSERS): 5262 return self.UNARY_PARSERS[self._prev.token_type](self) 5263 return self._parse_at_time_zone(self._parse_type()) 5264 5265 def _parse_type( 5266 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5267 ) -> t.Optional[exp.Expression]: 5268 interval = parse_interval and self._parse_interval() 5269 if interval: 5270 return interval 5271 5272 index = self._index 5273 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5274 5275 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5276 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5277 if isinstance(data_type, exp.Cast): 5278 # This constructor can contain ops directly after it, for instance struct unnesting: 5279 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5280 return self._parse_column_ops(data_type) 5281 5282 if data_type: 5283 index2 = self._index 5284 this = self._parse_primary() 5285 5286 if isinstance(this, exp.Literal): 5287 literal = this.name 5288 this = self._parse_column_ops(this) 5289 5290 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5291 if parser: 5292 return parser(self, this, data_type) 5293 5294 if ( 5295 self.ZONE_AWARE_TIMESTAMP_CONSTRUCTOR 5296 and data_type.is_type(exp.DataType.Type.TIMESTAMP) 5297 and TIME_ZONE_RE.search(literal) 5298 ): 5299 data_type = exp.DataType.build("TIMESTAMPTZ") 5300 5301 return self.expression(exp.Cast, this=this, to=data_type) 5302 5303 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5304 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5305 # 5306 # If the index difference here is greater than 1, that means the parser itself must have 5307 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5308 # 5309 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5310 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5311 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5312 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5313 # 5314 # In these cases, we don't really want to return the converted type, but instead retreat 5315 # and try to parse a Column or Identifier in the section below. 5316 if data_type.expressions and index2 - index > 1: 5317 self._retreat(index2) 5318 return self._parse_column_ops(data_type) 5319 5320 self._retreat(index) 5321 5322 if fallback_to_identifier: 5323 return self._parse_id_var() 5324 5325 this = self._parse_column() 5326 return this and self._parse_column_ops(this) 5327 5328 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5329 this = self._parse_type() 5330 if not this: 5331 return None 5332 5333 if isinstance(this, exp.Column) and not this.table: 5334 this = exp.var(this.name.upper()) 5335 5336 return self.expression( 5337 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5338 ) 5339 5340 def _parse_user_defined_type(self, identifier: exp.Identifier) -> t.Optional[exp.Expression]: 5341 type_name = identifier.name 5342 5343 while self._match(TokenType.DOT): 5344 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5345 5346 return exp.DataType.build(type_name, dialect=self.dialect, udt=True) 5347 5348 def _parse_types( 5349 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5350 ) -> t.Optional[exp.Expression]: 5351 index = self._index 5352 5353 this: t.Optional[exp.Expression] = None 5354 prefix = self._match_text_seq("SYSUDTLIB", ".") 5355 5356 if self._match_set(self.TYPE_TOKENS): 5357 type_token = self._prev.token_type 5358 else: 5359 type_token = None 5360 identifier = allow_identifiers and self._parse_id_var( 5361 any_token=False, tokens=(TokenType.VAR,) 5362 ) 5363 if isinstance(identifier, exp.Identifier): 5364 try: 5365 tokens = self.dialect.tokenize(identifier.name) 5366 except TokenError: 5367 tokens = None 5368 5369 if tokens and len(tokens) == 1 and tokens[0].token_type in self.TYPE_TOKENS: 5370 type_token = tokens[0].token_type 5371 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5372 this = self._parse_user_defined_type(identifier) 5373 else: 5374 self._retreat(self._index - 1) 5375 return None 5376 else: 5377 return None 5378 5379 if type_token == TokenType.PSEUDO_TYPE: 5380 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5381 5382 if type_token == TokenType.OBJECT_IDENTIFIER: 5383 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5384 5385 # https://materialize.com/docs/sql/types/map/ 5386 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5387 key_type = self._parse_types( 5388 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5389 ) 5390 if not self._match(TokenType.FARROW): 5391 self._retreat(index) 5392 return None 5393 5394 value_type = self._parse_types( 5395 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5396 ) 5397 if not self._match(TokenType.R_BRACKET): 5398 self._retreat(index) 5399 return None 5400 5401 return exp.DataType( 5402 this=exp.DataType.Type.MAP, 5403 expressions=[key_type, value_type], 5404 nested=True, 5405 prefix=prefix, 5406 ) 5407 5408 nested = type_token in self.NESTED_TYPE_TOKENS 5409 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5410 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5411 expressions = None 5412 maybe_func = False 5413 5414 if self._match(TokenType.L_PAREN): 5415 if is_struct: 5416 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5417 elif nested: 5418 expressions = self._parse_csv( 5419 lambda: self._parse_types( 5420 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5421 ) 5422 ) 5423 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5424 this = expressions[0] 5425 this.set("nullable", True) 5426 self._match_r_paren() 5427 return this 5428 elif type_token in self.ENUM_TYPE_TOKENS: 5429 expressions = self._parse_csv(self._parse_equality) 5430 elif is_aggregate: 5431 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5432 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5433 ) 5434 if not func_or_ident: 5435 return None 5436 expressions = [func_or_ident] 5437 if self._match(TokenType.COMMA): 5438 expressions.extend( 5439 self._parse_csv( 5440 lambda: self._parse_types( 5441 check_func=check_func, 5442 schema=schema, 5443 allow_identifiers=allow_identifiers, 5444 ) 5445 ) 5446 ) 5447 else: 5448 expressions = self._parse_csv(self._parse_type_size) 5449 5450 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5451 if type_token == TokenType.VECTOR and len(expressions) == 2: 5452 expressions = self._parse_vector_expressions(expressions) 5453 5454 if not self._match(TokenType.R_PAREN): 5455 self._retreat(index) 5456 return None 5457 5458 maybe_func = True 5459 5460 values: t.Optional[t.List[exp.Expression]] = None 5461 5462 if nested and self._match(TokenType.LT): 5463 if is_struct: 5464 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5465 else: 5466 expressions = self._parse_csv( 5467 lambda: self._parse_types( 5468 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5469 ) 5470 ) 5471 5472 if not self._match(TokenType.GT): 5473 self.raise_error("Expecting >") 5474 5475 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5476 values = self._parse_csv(self._parse_assignment) 5477 if not values and is_struct: 5478 values = None 5479 self._retreat(self._index - 1) 5480 else: 5481 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5482 5483 if type_token in self.TIMESTAMPS: 5484 if self._match_text_seq("WITH", "TIME", "ZONE"): 5485 maybe_func = False 5486 tz_type = ( 5487 exp.DataType.Type.TIMETZ 5488 if type_token in self.TIMES 5489 else exp.DataType.Type.TIMESTAMPTZ 5490 ) 5491 this = exp.DataType(this=tz_type, expressions=expressions) 5492 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5493 maybe_func = False 5494 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5495 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5496 maybe_func = False 5497 elif type_token == TokenType.INTERVAL: 5498 unit = self._parse_var(upper=True) 5499 if unit: 5500 if self._match_text_seq("TO"): 5501 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5502 5503 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5504 else: 5505 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5506 elif type_token == TokenType.VOID: 5507 this = exp.DataType(this=exp.DataType.Type.NULL) 5508 5509 if maybe_func and check_func: 5510 index2 = self._index 5511 peek = self._parse_string() 5512 5513 if not peek: 5514 self._retreat(index) 5515 return None 5516 5517 self._retreat(index2) 5518 5519 if not this: 5520 if self._match_text_seq("UNSIGNED"): 5521 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5522 if not unsigned_type_token: 5523 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5524 5525 type_token = unsigned_type_token or type_token 5526 5527 # NULLABLE without parentheses can be a column (Presto/Trino) 5528 if type_token == TokenType.NULLABLE and not expressions: 5529 self._retreat(index) 5530 return None 5531 5532 this = exp.DataType( 5533 this=exp.DataType.Type[type_token.value], 5534 expressions=expressions, 5535 nested=nested, 5536 prefix=prefix, 5537 ) 5538 5539 # Empty arrays/structs are allowed 5540 if values is not None: 5541 cls = exp.Struct if is_struct else exp.Array 5542 this = exp.cast(cls(expressions=values), this, copy=False) 5543 5544 elif expressions: 5545 this.set("expressions", expressions) 5546 5547 # https://materialize.com/docs/sql/types/list/#type-name 5548 while self._match(TokenType.LIST): 5549 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5550 5551 index = self._index 5552 5553 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5554 matched_array = self._match(TokenType.ARRAY) 5555 5556 while self._curr: 5557 datatype_token = self._prev.token_type 5558 matched_l_bracket = self._match(TokenType.L_BRACKET) 5559 5560 if (not matched_l_bracket and not matched_array) or ( 5561 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5562 ): 5563 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5564 # not to be confused with the fixed size array parsing 5565 break 5566 5567 matched_array = False 5568 values = self._parse_csv(self._parse_assignment) or None 5569 if ( 5570 values 5571 and not schema 5572 and ( 5573 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5574 ) 5575 ): 5576 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5577 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5578 self._retreat(index) 5579 break 5580 5581 this = exp.DataType( 5582 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5583 ) 5584 self._match(TokenType.R_BRACKET) 5585 5586 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5587 converter = self.TYPE_CONVERTERS.get(this.this) 5588 if converter: 5589 this = converter(t.cast(exp.DataType, this)) 5590 5591 return this 5592 5593 def _parse_vector_expressions( 5594 self, expressions: t.List[exp.Expression] 5595 ) -> t.List[exp.Expression]: 5596 return [exp.DataType.build(expressions[0].name, dialect=self.dialect), *expressions[1:]] 5597 5598 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5599 index = self._index 5600 5601 if ( 5602 self._curr 5603 and self._next 5604 and self._curr.token_type in self.TYPE_TOKENS 5605 and self._next.token_type in self.TYPE_TOKENS 5606 ): 5607 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5608 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5609 this = self._parse_id_var() 5610 else: 5611 this = ( 5612 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5613 or self._parse_id_var() 5614 ) 5615 5616 self._match(TokenType.COLON) 5617 5618 if ( 5619 type_required 5620 and not isinstance(this, exp.DataType) 5621 and not self._match_set(self.TYPE_TOKENS, advance=False) 5622 ): 5623 self._retreat(index) 5624 return self._parse_types() 5625 5626 return self._parse_column_def(this) 5627 5628 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5629 if not self._match_text_seq("AT", "TIME", "ZONE"): 5630 return this 5631 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5632 5633 def _parse_column(self) -> t.Optional[exp.Expression]: 5634 this = self._parse_column_reference() 5635 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5636 5637 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5638 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5639 5640 return column 5641 5642 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5643 this = self._parse_field() 5644 if ( 5645 not this 5646 and self._match(TokenType.VALUES, advance=False) 5647 and self.VALUES_FOLLOWED_BY_PAREN 5648 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5649 ): 5650 this = self._parse_id_var() 5651 5652 if isinstance(this, exp.Identifier): 5653 # We bubble up comments from the Identifier to the Column 5654 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5655 5656 return this 5657 5658 def _parse_colon_as_variant_extract( 5659 self, this: t.Optional[exp.Expression] 5660 ) -> t.Optional[exp.Expression]: 5661 casts = [] 5662 json_path = [] 5663 escape = None 5664 5665 while self._match(TokenType.COLON): 5666 start_index = self._index 5667 5668 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5669 path = self._parse_column_ops( 5670 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5671 ) 5672 5673 # The cast :: operator has a lower precedence than the extraction operator :, so 5674 # we rearrange the AST appropriately to avoid casting the JSON path 5675 while isinstance(path, exp.Cast): 5676 casts.append(path.to) 5677 path = path.this 5678 5679 if casts: 5680 dcolon_offset = next( 5681 i 5682 for i, t in enumerate(self._tokens[start_index:]) 5683 if t.token_type == TokenType.DCOLON 5684 ) 5685 end_token = self._tokens[start_index + dcolon_offset - 1] 5686 else: 5687 end_token = self._prev 5688 5689 if path: 5690 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5691 # it'll roundtrip to a string literal in GET_PATH 5692 if isinstance(path, exp.Identifier) and path.quoted: 5693 escape = True 5694 5695 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5696 5697 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5698 # Databricks transforms it back to the colon/dot notation 5699 if json_path: 5700 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5701 5702 if json_path_expr: 5703 json_path_expr.set("escape", escape) 5704 5705 this = self.expression( 5706 exp.JSONExtract, 5707 this=this, 5708 expression=json_path_expr, 5709 variant_extract=True, 5710 requires_json=self.JSON_EXTRACT_REQUIRES_JSON_EXPRESSION, 5711 ) 5712 5713 while casts: 5714 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5715 5716 return this 5717 5718 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5719 return self._parse_types() 5720 5721 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5722 this = self._parse_bracket(this) 5723 5724 while self._match_set(self.COLUMN_OPERATORS): 5725 op_token = self._prev.token_type 5726 op = self.COLUMN_OPERATORS.get(op_token) 5727 5728 if op_token in self.CAST_COLUMN_OPERATORS: 5729 field = self._parse_dcolon() 5730 if not field: 5731 self.raise_error("Expected type") 5732 elif op and self._curr: 5733 field = self._parse_column_reference() or self._parse_bitwise() 5734 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5735 field = self._parse_column_ops(field) 5736 else: 5737 field = self._parse_field(any_token=True, anonymous_func=True) 5738 5739 # Function calls can be qualified, e.g., x.y.FOO() 5740 # This converts the final AST to a series of Dots leading to the function call 5741 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5742 if isinstance(field, (exp.Func, exp.Window)) and this: 5743 this = this.transform( 5744 lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n 5745 ) 5746 5747 if op: 5748 this = op(self, this, field) 5749 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5750 this = self.expression( 5751 exp.Column, 5752 comments=this.comments, 5753 this=field, 5754 table=this.this, 5755 db=this.args.get("table"), 5756 catalog=this.args.get("db"), 5757 ) 5758 elif isinstance(field, exp.Window): 5759 # Move the exp.Dot's to the window's function 5760 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5761 field.set("this", window_func) 5762 this = field 5763 else: 5764 this = self.expression(exp.Dot, this=this, expression=field) 5765 5766 if field and field.comments: 5767 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5768 5769 this = self._parse_bracket(this) 5770 5771 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5772 5773 def _parse_paren(self) -> t.Optional[exp.Expression]: 5774 if not self._match(TokenType.L_PAREN): 5775 return None 5776 5777 comments = self._prev_comments 5778 query = self._parse_select() 5779 5780 if query: 5781 expressions = [query] 5782 else: 5783 expressions = self._parse_expressions() 5784 5785 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5786 5787 if not this and self._match(TokenType.R_PAREN, advance=False): 5788 this = self.expression(exp.Tuple) 5789 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5790 this = self._parse_subquery(this=this, parse_alias=False) 5791 elif isinstance(this, exp.Subquery): 5792 this = self._parse_subquery(this=self._parse_set_operations(this), parse_alias=False) 5793 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5794 this = self.expression(exp.Tuple, expressions=expressions) 5795 else: 5796 this = self.expression(exp.Paren, this=this) 5797 5798 if this: 5799 this.add_comments(comments) 5800 5801 self._match_r_paren(expression=this) 5802 5803 if isinstance(this, exp.Paren) and isinstance(this.this, exp.AggFunc): 5804 return self._parse_window(this) 5805 5806 return this 5807 5808 def _parse_primary(self) -> t.Optional[exp.Expression]: 5809 if self._match_set(self.PRIMARY_PARSERS): 5810 token_type = self._prev.token_type 5811 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5812 5813 if token_type == TokenType.STRING: 5814 expressions = [primary] 5815 while self._match(TokenType.STRING): 5816 expressions.append(exp.Literal.string(self._prev.text)) 5817 5818 if len(expressions) > 1: 5819 return self.expression(exp.Concat, expressions=expressions) 5820 5821 return primary 5822 5823 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5824 return exp.Literal.number(f"0.{self._prev.text}") 5825 5826 return self._parse_paren() 5827 5828 def _parse_field( 5829 self, 5830 any_token: bool = False, 5831 tokens: t.Optional[t.Collection[TokenType]] = None, 5832 anonymous_func: bool = False, 5833 ) -> t.Optional[exp.Expression]: 5834 if anonymous_func: 5835 field = ( 5836 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5837 or self._parse_primary() 5838 ) 5839 else: 5840 field = self._parse_primary() or self._parse_function( 5841 anonymous=anonymous_func, any_token=any_token 5842 ) 5843 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5844 5845 def _parse_function( 5846 self, 5847 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5848 anonymous: bool = False, 5849 optional_parens: bool = True, 5850 any_token: bool = False, 5851 ) -> t.Optional[exp.Expression]: 5852 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5853 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5854 fn_syntax = False 5855 if ( 5856 self._match(TokenType.L_BRACE, advance=False) 5857 and self._next 5858 and self._next.text.upper() == "FN" 5859 ): 5860 self._advance(2) 5861 fn_syntax = True 5862 5863 func = self._parse_function_call( 5864 functions=functions, 5865 anonymous=anonymous, 5866 optional_parens=optional_parens, 5867 any_token=any_token, 5868 ) 5869 5870 if fn_syntax: 5871 self._match(TokenType.R_BRACE) 5872 5873 return func 5874 5875 def _parse_function_args(self, alias: bool = False) -> t.List[exp.Expression]: 5876 return self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5877 5878 def _parse_function_call( 5879 self, 5880 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5881 anonymous: bool = False, 5882 optional_parens: bool = True, 5883 any_token: bool = False, 5884 ) -> t.Optional[exp.Expression]: 5885 if not self._curr: 5886 return None 5887 5888 comments = self._curr.comments 5889 prev = self._prev 5890 token = self._curr 5891 token_type = self._curr.token_type 5892 this = self._curr.text 5893 upper = this.upper() 5894 5895 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5896 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5897 self._advance() 5898 return self._parse_window(parser(self)) 5899 5900 if not self._next or self._next.token_type != TokenType.L_PAREN: 5901 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5902 self._advance() 5903 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5904 5905 return None 5906 5907 if any_token: 5908 if token_type in self.RESERVED_TOKENS: 5909 return None 5910 elif token_type not in self.FUNC_TOKENS: 5911 return None 5912 5913 self._advance(2) 5914 5915 parser = self.FUNCTION_PARSERS.get(upper) 5916 if parser and not anonymous: 5917 this = parser(self) 5918 else: 5919 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5920 5921 if subquery_predicate: 5922 expr = None 5923 if self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5924 expr = self._parse_select() 5925 self._match_r_paren() 5926 elif prev and prev.token_type in (TokenType.LIKE, TokenType.ILIKE): 5927 # Backtrack one token since we've consumed the L_PAREN here. Instead, we'd like 5928 # to parse "LIKE [ANY | ALL] (...)" as a whole into an exp.Tuple or exp.Paren 5929 self._advance(-1) 5930 expr = self._parse_bitwise() 5931 5932 if expr: 5933 return self.expression(subquery_predicate, comments=comments, this=expr) 5934 5935 if functions is None: 5936 functions = self.FUNCTIONS 5937 5938 function = functions.get(upper) 5939 known_function = function and not anonymous 5940 5941 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5942 args = self._parse_function_args(alias) 5943 5944 post_func_comments = self._curr and self._curr.comments 5945 if known_function and post_func_comments: 5946 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5947 # call we'll construct it as exp.Anonymous, even if it's "known" 5948 if any( 5949 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5950 for comment in post_func_comments 5951 ): 5952 known_function = False 5953 5954 if alias and known_function: 5955 args = self._kv_to_prop_eq(args) 5956 5957 if known_function: 5958 func_builder = t.cast(t.Callable, function) 5959 5960 if "dialect" in func_builder.__code__.co_varnames: 5961 func = func_builder(args, dialect=self.dialect) 5962 else: 5963 func = func_builder(args) 5964 5965 func = self.validate_expression(func, args) 5966 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5967 func.meta["name"] = this 5968 5969 this = func 5970 else: 5971 if token_type == TokenType.IDENTIFIER: 5972 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5973 5974 this = self.expression(exp.Anonymous, this=this, expressions=args) 5975 this = this.update_positions(token) 5976 5977 if isinstance(this, exp.Expression): 5978 this.add_comments(comments) 5979 5980 self._match_r_paren(this) 5981 return self._parse_window(this) 5982 5983 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5984 return expression 5985 5986 def _kv_to_prop_eq( 5987 self, expressions: t.List[exp.Expression], parse_map: bool = False 5988 ) -> t.List[exp.Expression]: 5989 transformed = [] 5990 5991 for index, e in enumerate(expressions): 5992 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5993 if isinstance(e, exp.Alias): 5994 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5995 5996 if not isinstance(e, exp.PropertyEQ): 5997 e = self.expression( 5998 exp.PropertyEQ, 5999 this=e.this if parse_map else exp.to_identifier(e.this.name), 6000 expression=e.expression, 6001 ) 6002 6003 if isinstance(e.this, exp.Column): 6004 e.this.replace(e.this.this) 6005 else: 6006 e = self._to_prop_eq(e, index) 6007 6008 transformed.append(e) 6009 6010 return transformed 6011 6012 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 6013 return self._parse_statement() 6014 6015 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 6016 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 6017 6018 def _parse_user_defined_function( 6019 self, kind: t.Optional[TokenType] = None 6020 ) -> t.Optional[exp.Expression]: 6021 this = self._parse_table_parts(schema=True) 6022 6023 if not self._match(TokenType.L_PAREN): 6024 return this 6025 6026 expressions = self._parse_csv(self._parse_function_parameter) 6027 self._match_r_paren() 6028 return self.expression( 6029 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 6030 ) 6031 6032 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 6033 literal = self._parse_primary() 6034 if literal: 6035 return self.expression(exp.Introducer, this=token.text, expression=literal) 6036 6037 return self._identifier_expression(token) 6038 6039 def _parse_session_parameter(self) -> exp.SessionParameter: 6040 kind = None 6041 this = self._parse_id_var() or self._parse_primary() 6042 6043 if this and self._match(TokenType.DOT): 6044 kind = this.name 6045 this = self._parse_var() or self._parse_primary() 6046 6047 return self.expression(exp.SessionParameter, this=this, kind=kind) 6048 6049 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 6050 return self._parse_id_var() 6051 6052 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 6053 index = self._index 6054 6055 if self._match(TokenType.L_PAREN): 6056 expressions = t.cast( 6057 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 6058 ) 6059 6060 if not self._match(TokenType.R_PAREN): 6061 self._retreat(index) 6062 else: 6063 expressions = [self._parse_lambda_arg()] 6064 6065 if self._match_set(self.LAMBDAS): 6066 return self.LAMBDAS[self._prev.token_type](self, expressions) 6067 6068 self._retreat(index) 6069 6070 this: t.Optional[exp.Expression] 6071 6072 if self._match(TokenType.DISTINCT): 6073 this = self.expression( 6074 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 6075 ) 6076 else: 6077 this = self._parse_select_or_expression(alias=alias) 6078 6079 return self._parse_limit( 6080 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 6081 ) 6082 6083 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6084 index = self._index 6085 if not self._match(TokenType.L_PAREN): 6086 return this 6087 6088 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 6089 # expr can be of both types 6090 if self._match_set(self.SELECT_START_TOKENS): 6091 self._retreat(index) 6092 return this 6093 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 6094 self._match_r_paren() 6095 return self.expression(exp.Schema, this=this, expressions=args) 6096 6097 def _parse_field_def(self) -> t.Optional[exp.Expression]: 6098 return self._parse_column_def(self._parse_field(any_token=True)) 6099 6100 def _parse_column_def( 6101 self, this: t.Optional[exp.Expression], computed_column: bool = True 6102 ) -> t.Optional[exp.Expression]: 6103 # column defs are not really columns, they're identifiers 6104 if isinstance(this, exp.Column): 6105 this = this.this 6106 6107 if not computed_column: 6108 self._match(TokenType.ALIAS) 6109 6110 kind = self._parse_types(schema=True) 6111 6112 if self._match_text_seq("FOR", "ORDINALITY"): 6113 return self.expression(exp.ColumnDef, this=this, ordinality=True) 6114 6115 constraints: t.List[exp.Expression] = [] 6116 6117 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 6118 ("ALIAS", "MATERIALIZED") 6119 ): 6120 persisted = self._prev.text.upper() == "MATERIALIZED" 6121 constraint_kind = exp.ComputedColumnConstraint( 6122 this=self._parse_assignment(), 6123 persisted=persisted or self._match_text_seq("PERSISTED"), 6124 data_type=exp.Var(this="AUTO") 6125 if self._match_text_seq("AUTO") 6126 else self._parse_types(), 6127 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 6128 ) 6129 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 6130 elif ( 6131 kind 6132 and self._match(TokenType.ALIAS, advance=False) 6133 and ( 6134 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 6135 or (self._next and self._next.token_type == TokenType.L_PAREN) 6136 ) 6137 ): 6138 self._advance() 6139 constraints.append( 6140 self.expression( 6141 exp.ColumnConstraint, 6142 kind=exp.ComputedColumnConstraint( 6143 this=self._parse_disjunction(), 6144 persisted=self._match_texts(("STORED", "VIRTUAL")) 6145 and self._prev.text.upper() == "STORED", 6146 ), 6147 ) 6148 ) 6149 6150 while True: 6151 constraint = self._parse_column_constraint() 6152 if not constraint: 6153 break 6154 constraints.append(constraint) 6155 6156 if not kind and not constraints: 6157 return this 6158 6159 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 6160 6161 def _parse_auto_increment( 6162 self, 6163 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 6164 start = None 6165 increment = None 6166 order = None 6167 6168 if self._match(TokenType.L_PAREN, advance=False): 6169 args = self._parse_wrapped_csv(self._parse_bitwise) 6170 start = seq_get(args, 0) 6171 increment = seq_get(args, 1) 6172 elif self._match_text_seq("START"): 6173 start = self._parse_bitwise() 6174 self._match_text_seq("INCREMENT") 6175 increment = self._parse_bitwise() 6176 if self._match_text_seq("ORDER"): 6177 order = True 6178 elif self._match_text_seq("NOORDER"): 6179 order = False 6180 6181 if start and increment: 6182 return exp.GeneratedAsIdentityColumnConstraint( 6183 start=start, increment=increment, this=False, order=order 6184 ) 6185 6186 return exp.AutoIncrementColumnConstraint() 6187 6188 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 6189 if not self._match_text_seq("REFRESH"): 6190 self._retreat(self._index - 1) 6191 return None 6192 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 6193 6194 def _parse_compress(self) -> exp.CompressColumnConstraint: 6195 if self._match(TokenType.L_PAREN, advance=False): 6196 return self.expression( 6197 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 6198 ) 6199 6200 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 6201 6202 def _parse_generated_as_identity( 6203 self, 6204 ) -> ( 6205 exp.GeneratedAsIdentityColumnConstraint 6206 | exp.ComputedColumnConstraint 6207 | exp.GeneratedAsRowColumnConstraint 6208 ): 6209 if self._match_text_seq("BY", "DEFAULT"): 6210 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 6211 this = self.expression( 6212 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 6213 ) 6214 else: 6215 self._match_text_seq("ALWAYS") 6216 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 6217 6218 self._match(TokenType.ALIAS) 6219 6220 if self._match_text_seq("ROW"): 6221 start = self._match_text_seq("START") 6222 if not start: 6223 self._match(TokenType.END) 6224 hidden = self._match_text_seq("HIDDEN") 6225 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 6226 6227 identity = self._match_text_seq("IDENTITY") 6228 6229 if self._match(TokenType.L_PAREN): 6230 if self._match(TokenType.START_WITH): 6231 this.set("start", self._parse_bitwise()) 6232 if self._match_text_seq("INCREMENT", "BY"): 6233 this.set("increment", self._parse_bitwise()) 6234 if self._match_text_seq("MINVALUE"): 6235 this.set("minvalue", self._parse_bitwise()) 6236 if self._match_text_seq("MAXVALUE"): 6237 this.set("maxvalue", self._parse_bitwise()) 6238 6239 if self._match_text_seq("CYCLE"): 6240 this.set("cycle", True) 6241 elif self._match_text_seq("NO", "CYCLE"): 6242 this.set("cycle", False) 6243 6244 if not identity: 6245 this.set("expression", self._parse_range()) 6246 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 6247 args = self._parse_csv(self._parse_bitwise) 6248 this.set("start", seq_get(args, 0)) 6249 this.set("increment", seq_get(args, 1)) 6250 6251 self._match_r_paren() 6252 6253 return this 6254 6255 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 6256 self._match_text_seq("LENGTH") 6257 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 6258 6259 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 6260 if self._match_text_seq("NULL"): 6261 return self.expression(exp.NotNullColumnConstraint) 6262 if self._match_text_seq("CASESPECIFIC"): 6263 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 6264 if self._match_text_seq("FOR", "REPLICATION"): 6265 return self.expression(exp.NotForReplicationColumnConstraint) 6266 6267 # Unconsume the `NOT` token 6268 self._retreat(self._index - 1) 6269 return None 6270 6271 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 6272 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 6273 6274 procedure_option_follows = ( 6275 self._match(TokenType.WITH, advance=False) 6276 and self._next 6277 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6278 ) 6279 6280 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6281 return self.expression( 6282 exp.ColumnConstraint, 6283 this=this, 6284 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 6285 ) 6286 6287 return this 6288 6289 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6290 if not self._match(TokenType.CONSTRAINT): 6291 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6292 6293 return self.expression( 6294 exp.Constraint, 6295 this=self._parse_id_var(), 6296 expressions=self._parse_unnamed_constraints(), 6297 ) 6298 6299 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6300 constraints = [] 6301 while True: 6302 constraint = self._parse_unnamed_constraint() or self._parse_function() 6303 if not constraint: 6304 break 6305 constraints.append(constraint) 6306 6307 return constraints 6308 6309 def _parse_unnamed_constraint( 6310 self, constraints: t.Optional[t.Collection[str]] = None 6311 ) -> t.Optional[exp.Expression]: 6312 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6313 constraints or self.CONSTRAINT_PARSERS 6314 ): 6315 return None 6316 6317 constraint = self._prev.text.upper() 6318 if constraint not in self.CONSTRAINT_PARSERS: 6319 self.raise_error(f"No parser found for schema constraint {constraint}.") 6320 6321 return self.CONSTRAINT_PARSERS[constraint](self) 6322 6323 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6324 return self._parse_id_var(any_token=False) 6325 6326 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6327 self._match_texts(("KEY", "INDEX")) 6328 return self.expression( 6329 exp.UniqueColumnConstraint, 6330 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6331 this=self._parse_schema(self._parse_unique_key()), 6332 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6333 on_conflict=self._parse_on_conflict(), 6334 options=self._parse_key_constraint_options(), 6335 ) 6336 6337 def _parse_key_constraint_options(self) -> t.List[str]: 6338 options = [] 6339 while True: 6340 if not self._curr: 6341 break 6342 6343 if self._match(TokenType.ON): 6344 action = None 6345 on = self._advance_any() and self._prev.text 6346 6347 if self._match_text_seq("NO", "ACTION"): 6348 action = "NO ACTION" 6349 elif self._match_text_seq("CASCADE"): 6350 action = "CASCADE" 6351 elif self._match_text_seq("RESTRICT"): 6352 action = "RESTRICT" 6353 elif self._match_pair(TokenType.SET, TokenType.NULL): 6354 action = "SET NULL" 6355 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6356 action = "SET DEFAULT" 6357 else: 6358 self.raise_error("Invalid key constraint") 6359 6360 options.append(f"ON {on} {action}") 6361 else: 6362 var = self._parse_var_from_options( 6363 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6364 ) 6365 if not var: 6366 break 6367 options.append(var.name) 6368 6369 return options 6370 6371 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6372 if match and not self._match(TokenType.REFERENCES): 6373 return None 6374 6375 expressions = None 6376 this = self._parse_table(schema=True) 6377 options = self._parse_key_constraint_options() 6378 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6379 6380 def _parse_foreign_key(self) -> exp.ForeignKey: 6381 expressions = ( 6382 self._parse_wrapped_id_vars() 6383 if not self._match(TokenType.REFERENCES, advance=False) 6384 else None 6385 ) 6386 reference = self._parse_references() 6387 on_options = {} 6388 6389 while self._match(TokenType.ON): 6390 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6391 self.raise_error("Expected DELETE or UPDATE") 6392 6393 kind = self._prev.text.lower() 6394 6395 if self._match_text_seq("NO", "ACTION"): 6396 action = "NO ACTION" 6397 elif self._match(TokenType.SET): 6398 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6399 action = "SET " + self._prev.text.upper() 6400 else: 6401 self._advance() 6402 action = self._prev.text.upper() 6403 6404 on_options[kind] = action 6405 6406 return self.expression( 6407 exp.ForeignKey, 6408 expressions=expressions, 6409 reference=reference, 6410 options=self._parse_key_constraint_options(), 6411 **on_options, # type: ignore 6412 ) 6413 6414 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6415 return self._parse_ordered() or self._parse_field() 6416 6417 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6418 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6419 self._retreat(self._index - 1) 6420 return None 6421 6422 id_vars = self._parse_wrapped_id_vars() 6423 return self.expression( 6424 exp.PeriodForSystemTimeConstraint, 6425 this=seq_get(id_vars, 0), 6426 expression=seq_get(id_vars, 1), 6427 ) 6428 6429 def _parse_primary_key( 6430 self, wrapped_optional: bool = False, in_props: bool = False 6431 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6432 desc = ( 6433 self._match_set((TokenType.ASC, TokenType.DESC)) 6434 and self._prev.token_type == TokenType.DESC 6435 ) 6436 6437 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6438 return self.expression( 6439 exp.PrimaryKeyColumnConstraint, 6440 desc=desc, 6441 options=self._parse_key_constraint_options(), 6442 ) 6443 6444 expressions = self._parse_wrapped_csv( 6445 self._parse_primary_key_part, optional=wrapped_optional 6446 ) 6447 6448 return self.expression( 6449 exp.PrimaryKey, 6450 expressions=expressions, 6451 include=self._parse_index_params(), 6452 options=self._parse_key_constraint_options(), 6453 ) 6454 6455 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6456 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6457 6458 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6459 """ 6460 Parses a datetime column in ODBC format. We parse the column into the corresponding 6461 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6462 same as we did for `DATE('yyyy-mm-dd')`. 6463 6464 Reference: 6465 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6466 """ 6467 self._match(TokenType.VAR) 6468 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6469 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6470 if not self._match(TokenType.R_BRACE): 6471 self.raise_error("Expected }") 6472 return expression 6473 6474 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6475 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6476 return this 6477 6478 if self.MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS: 6479 map_token = seq_get(self._tokens, self._index - 2) 6480 parse_map = map_token is not None and map_token.text.upper() == "MAP" 6481 else: 6482 parse_map = False 6483 6484 bracket_kind = self._prev.token_type 6485 if ( 6486 bracket_kind == TokenType.L_BRACE 6487 and self._curr 6488 and self._curr.token_type == TokenType.VAR 6489 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6490 ): 6491 return self._parse_odbc_datetime_literal() 6492 6493 expressions = self._parse_csv( 6494 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6495 ) 6496 6497 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6498 self.raise_error("Expected ]") 6499 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6500 self.raise_error("Expected }") 6501 6502 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6503 if bracket_kind == TokenType.L_BRACE: 6504 this = self.expression( 6505 exp.Struct, 6506 expressions=self._kv_to_prop_eq(expressions=expressions, parse_map=parse_map), 6507 ) 6508 elif not this: 6509 this = build_array_constructor( 6510 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6511 ) 6512 else: 6513 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6514 if constructor_type: 6515 return build_array_constructor( 6516 constructor_type, 6517 args=expressions, 6518 bracket_kind=bracket_kind, 6519 dialect=self.dialect, 6520 ) 6521 6522 expressions = apply_index_offset( 6523 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6524 ) 6525 this = self.expression( 6526 exp.Bracket, 6527 this=this, 6528 expressions=expressions, 6529 comments=this.pop_comments(), 6530 ) 6531 6532 self._add_comments(this) 6533 return self._parse_bracket(this) 6534 6535 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6536 if self._match(TokenType.COLON): 6537 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6538 return this 6539 6540 def _parse_case(self) -> t.Optional[exp.Expression]: 6541 if self._match(TokenType.DOT, advance=False): 6542 # Avoid raising on valid expressions like case.*, supported by, e.g., spark & snowflake 6543 self._retreat(self._index - 1) 6544 return None 6545 6546 ifs = [] 6547 default = None 6548 6549 comments = self._prev_comments 6550 expression = self._parse_assignment() 6551 6552 while self._match(TokenType.WHEN): 6553 this = self._parse_assignment() 6554 self._match(TokenType.THEN) 6555 then = self._parse_assignment() 6556 ifs.append(self.expression(exp.If, this=this, true=then)) 6557 6558 if self._match(TokenType.ELSE): 6559 default = self._parse_assignment() 6560 6561 if not self._match(TokenType.END): 6562 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6563 default = exp.column("interval") 6564 else: 6565 self.raise_error("Expected END after CASE", self._prev) 6566 6567 return self.expression( 6568 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6569 ) 6570 6571 def _parse_if(self) -> t.Optional[exp.Expression]: 6572 if self._match(TokenType.L_PAREN): 6573 args = self._parse_csv( 6574 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6575 ) 6576 this = self.validate_expression(exp.If.from_arg_list(args), args) 6577 self._match_r_paren() 6578 else: 6579 index = self._index - 1 6580 6581 if self.NO_PAREN_IF_COMMANDS and index == 0: 6582 return self._parse_as_command(self._prev) 6583 6584 condition = self._parse_assignment() 6585 6586 if not condition: 6587 self._retreat(index) 6588 return None 6589 6590 self._match(TokenType.THEN) 6591 true = self._parse_assignment() 6592 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6593 self._match(TokenType.END) 6594 this = self.expression(exp.If, this=condition, true=true, false=false) 6595 6596 return this 6597 6598 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6599 if not self._match_text_seq("VALUE", "FOR"): 6600 self._retreat(self._index - 1) 6601 return None 6602 6603 return self.expression( 6604 exp.NextValueFor, 6605 this=self._parse_column(), 6606 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6607 ) 6608 6609 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6610 this = self._parse_function() or self._parse_var_or_string(upper=True) 6611 6612 if self._match(TokenType.FROM): 6613 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6614 6615 if not self._match(TokenType.COMMA): 6616 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6617 6618 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6619 6620 def _parse_gap_fill(self) -> exp.GapFill: 6621 self._match(TokenType.TABLE) 6622 this = self._parse_table() 6623 6624 self._match(TokenType.COMMA) 6625 args = [this, *self._parse_csv(self._parse_lambda)] 6626 6627 gap_fill = exp.GapFill.from_arg_list(args) 6628 return self.validate_expression(gap_fill, args) 6629 6630 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6631 this = self._parse_assignment() 6632 6633 if not self._match(TokenType.ALIAS): 6634 if self._match(TokenType.COMMA): 6635 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6636 6637 self.raise_error("Expected AS after CAST") 6638 6639 fmt = None 6640 to = self._parse_types() 6641 6642 default = self._match(TokenType.DEFAULT) 6643 if default: 6644 default = self._parse_bitwise() 6645 self._match_text_seq("ON", "CONVERSION", "ERROR") 6646 6647 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6648 fmt_string = self._parse_string() 6649 fmt = self._parse_at_time_zone(fmt_string) 6650 6651 if not to: 6652 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6653 if to.this in exp.DataType.TEMPORAL_TYPES: 6654 this = self.expression( 6655 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6656 this=this, 6657 format=exp.Literal.string( 6658 format_time( 6659 fmt_string.this if fmt_string else "", 6660 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6661 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6662 ) 6663 ), 6664 safe=safe, 6665 ) 6666 6667 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6668 this.set("zone", fmt.args["zone"]) 6669 return this 6670 elif not to: 6671 self.raise_error("Expected TYPE after CAST") 6672 elif isinstance(to, exp.Identifier): 6673 to = exp.DataType.build(to.name, dialect=self.dialect, udt=True) 6674 elif to.this == exp.DataType.Type.CHAR: 6675 if self._match(TokenType.CHARACTER_SET): 6676 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6677 6678 return self.build_cast( 6679 strict=strict, 6680 this=this, 6681 to=to, 6682 format=fmt, 6683 safe=safe, 6684 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6685 default=default, 6686 ) 6687 6688 def _parse_string_agg(self) -> exp.GroupConcat: 6689 if self._match(TokenType.DISTINCT): 6690 args: t.List[t.Optional[exp.Expression]] = [ 6691 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6692 ] 6693 if self._match(TokenType.COMMA): 6694 args.extend(self._parse_csv(self._parse_assignment)) 6695 else: 6696 args = self._parse_csv(self._parse_assignment) # type: ignore 6697 6698 if self._match_text_seq("ON", "OVERFLOW"): 6699 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6700 if self._match_text_seq("ERROR"): 6701 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6702 else: 6703 self._match_text_seq("TRUNCATE") 6704 on_overflow = self.expression( 6705 exp.OverflowTruncateBehavior, 6706 this=self._parse_string(), 6707 with_count=( 6708 self._match_text_seq("WITH", "COUNT") 6709 or not self._match_text_seq("WITHOUT", "COUNT") 6710 ), 6711 ) 6712 else: 6713 on_overflow = None 6714 6715 index = self._index 6716 if not self._match(TokenType.R_PAREN) and args: 6717 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6718 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6719 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6720 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6721 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6722 6723 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6724 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6725 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6726 if not self._match_text_seq("WITHIN", "GROUP"): 6727 self._retreat(index) 6728 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6729 6730 # The corresponding match_r_paren will be called in parse_function (caller) 6731 self._match_l_paren() 6732 6733 return self.expression( 6734 exp.GroupConcat, 6735 this=self._parse_order(this=seq_get(args, 0)), 6736 separator=seq_get(args, 1), 6737 on_overflow=on_overflow, 6738 ) 6739 6740 def _parse_convert( 6741 self, strict: bool, safe: t.Optional[bool] = None 6742 ) -> t.Optional[exp.Expression]: 6743 this = self._parse_bitwise() 6744 6745 if self._match(TokenType.USING): 6746 to: t.Optional[exp.Expression] = self.expression( 6747 exp.CharacterSet, this=self._parse_var() 6748 ) 6749 elif self._match(TokenType.COMMA): 6750 to = self._parse_types() 6751 else: 6752 to = None 6753 6754 return self.build_cast(strict=strict, this=this, to=to, safe=safe) 6755 6756 def _parse_xml_table(self) -> exp.XMLTable: 6757 namespaces = None 6758 passing = None 6759 columns = None 6760 6761 if self._match_text_seq("XMLNAMESPACES", "("): 6762 namespaces = self._parse_xml_namespace() 6763 self._match_text_seq(")", ",") 6764 6765 this = self._parse_string() 6766 6767 if self._match_text_seq("PASSING"): 6768 # The BY VALUE keywords are optional and are provided for semantic clarity 6769 self._match_text_seq("BY", "VALUE") 6770 passing = self._parse_csv(self._parse_column) 6771 6772 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6773 6774 if self._match_text_seq("COLUMNS"): 6775 columns = self._parse_csv(self._parse_field_def) 6776 6777 return self.expression( 6778 exp.XMLTable, 6779 this=this, 6780 namespaces=namespaces, 6781 passing=passing, 6782 columns=columns, 6783 by_ref=by_ref, 6784 ) 6785 6786 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6787 namespaces = [] 6788 6789 while True: 6790 if self._match(TokenType.DEFAULT): 6791 uri = self._parse_string() 6792 else: 6793 uri = self._parse_alias(self._parse_string()) 6794 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6795 if not self._match(TokenType.COMMA): 6796 break 6797 6798 return namespaces 6799 6800 def _parse_decode(self) -> t.Optional[exp.Decode | exp.DecodeCase]: 6801 args = self._parse_csv(self._parse_assignment) 6802 6803 if len(args) < 3: 6804 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6805 6806 return self.expression(exp.DecodeCase, expressions=args) 6807 6808 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6809 self._match_text_seq("KEY") 6810 key = self._parse_column() 6811 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6812 self._match_text_seq("VALUE") 6813 value = self._parse_bitwise() 6814 6815 if not key and not value: 6816 return None 6817 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6818 6819 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6820 if not this or not self._match_text_seq("FORMAT", "JSON"): 6821 return this 6822 6823 return self.expression(exp.FormatJson, this=this) 6824 6825 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6826 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6827 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6828 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6829 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6830 else: 6831 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6832 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6833 6834 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6835 6836 if not empty and not error and not null: 6837 return None 6838 6839 return self.expression( 6840 exp.OnCondition, 6841 empty=empty, 6842 error=error, 6843 null=null, 6844 ) 6845 6846 def _parse_on_handling( 6847 self, on: str, *values: str 6848 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6849 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6850 for value in values: 6851 if self._match_text_seq(value, "ON", on): 6852 return f"{value} ON {on}" 6853 6854 index = self._index 6855 if self._match(TokenType.DEFAULT): 6856 default_value = self._parse_bitwise() 6857 if self._match_text_seq("ON", on): 6858 return default_value 6859 6860 self._retreat(index) 6861 6862 return None 6863 6864 @t.overload 6865 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6866 6867 @t.overload 6868 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6869 6870 def _parse_json_object(self, agg=False): 6871 star = self._parse_star() 6872 expressions = ( 6873 [star] 6874 if star 6875 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6876 ) 6877 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6878 6879 unique_keys = None 6880 if self._match_text_seq("WITH", "UNIQUE"): 6881 unique_keys = True 6882 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6883 unique_keys = False 6884 6885 self._match_text_seq("KEYS") 6886 6887 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6888 self._parse_type() 6889 ) 6890 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6891 6892 return self.expression( 6893 exp.JSONObjectAgg if agg else exp.JSONObject, 6894 expressions=expressions, 6895 null_handling=null_handling, 6896 unique_keys=unique_keys, 6897 return_type=return_type, 6898 encoding=encoding, 6899 ) 6900 6901 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6902 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6903 if not self._match_text_seq("NESTED"): 6904 this = self._parse_id_var() 6905 kind = self._parse_types(allow_identifiers=False) 6906 nested = None 6907 else: 6908 this = None 6909 kind = None 6910 nested = True 6911 6912 path = self._match_text_seq("PATH") and self._parse_string() 6913 nested_schema = nested and self._parse_json_schema() 6914 6915 return self.expression( 6916 exp.JSONColumnDef, 6917 this=this, 6918 kind=kind, 6919 path=path, 6920 nested_schema=nested_schema, 6921 ) 6922 6923 def _parse_json_schema(self) -> exp.JSONSchema: 6924 self._match_text_seq("COLUMNS") 6925 return self.expression( 6926 exp.JSONSchema, 6927 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6928 ) 6929 6930 def _parse_json_table(self) -> exp.JSONTable: 6931 this = self._parse_format_json(self._parse_bitwise()) 6932 path = self._match(TokenType.COMMA) and self._parse_string() 6933 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6934 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6935 schema = self._parse_json_schema() 6936 6937 return exp.JSONTable( 6938 this=this, 6939 schema=schema, 6940 path=path, 6941 error_handling=error_handling, 6942 empty_handling=empty_handling, 6943 ) 6944 6945 def _parse_match_against(self) -> exp.MatchAgainst: 6946 if self._match_text_seq("TABLE"): 6947 # parse SingleStore MATCH(TABLE ...) syntax 6948 # https://docs.singlestore.com/cloud/reference/sql-reference/full-text-search-functions/match/ 6949 expressions = [] 6950 table = self._parse_table() 6951 if table: 6952 expressions = [table] 6953 else: 6954 expressions = self._parse_csv(self._parse_column) 6955 6956 self._match_text_seq(")", "AGAINST", "(") 6957 6958 this = self._parse_string() 6959 6960 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6961 modifier = "IN NATURAL LANGUAGE MODE" 6962 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6963 modifier = f"{modifier} WITH QUERY EXPANSION" 6964 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6965 modifier = "IN BOOLEAN MODE" 6966 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6967 modifier = "WITH QUERY EXPANSION" 6968 else: 6969 modifier = None 6970 6971 return self.expression( 6972 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6973 ) 6974 6975 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6976 def _parse_open_json(self) -> exp.OpenJSON: 6977 this = self._parse_bitwise() 6978 path = self._match(TokenType.COMMA) and self._parse_string() 6979 6980 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6981 this = self._parse_field(any_token=True) 6982 kind = self._parse_types() 6983 path = self._parse_string() 6984 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6985 6986 return self.expression( 6987 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6988 ) 6989 6990 expressions = None 6991 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6992 self._match_l_paren() 6993 expressions = self._parse_csv(_parse_open_json_column_def) 6994 6995 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6996 6997 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6998 args = self._parse_csv(self._parse_bitwise) 6999 7000 if self._match(TokenType.IN): 7001 return self.expression( 7002 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 7003 ) 7004 7005 if haystack_first: 7006 haystack = seq_get(args, 0) 7007 needle = seq_get(args, 1) 7008 else: 7009 haystack = seq_get(args, 1) 7010 needle = seq_get(args, 0) 7011 7012 return self.expression( 7013 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 7014 ) 7015 7016 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 7017 args = self._parse_csv(self._parse_table) 7018 return exp.JoinHint(this=func_name.upper(), expressions=args) 7019 7020 def _parse_substring(self) -> exp.Substring: 7021 # Postgres supports the form: substring(string [from int] [for int]) 7022 # (despite being undocumented, the reverse order also works) 7023 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 7024 7025 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 7026 7027 start, length = None, None 7028 7029 while self._curr: 7030 if self._match(TokenType.FROM): 7031 start = self._parse_bitwise() 7032 elif self._match(TokenType.FOR): 7033 if not start: 7034 start = exp.Literal.number(1) 7035 length = self._parse_bitwise() 7036 else: 7037 break 7038 7039 if start: 7040 args.append(start) 7041 if length: 7042 args.append(length) 7043 7044 return self.validate_expression(exp.Substring.from_arg_list(args), args) 7045 7046 def _parse_trim(self) -> exp.Trim: 7047 # https://www.w3resource.com/sql/character-functions/trim.php 7048 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 7049 7050 position = None 7051 collation = None 7052 expression = None 7053 7054 if self._match_texts(self.TRIM_TYPES): 7055 position = self._prev.text.upper() 7056 7057 this = self._parse_bitwise() 7058 if self._match_set((TokenType.FROM, TokenType.COMMA)): 7059 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 7060 expression = self._parse_bitwise() 7061 7062 if invert_order: 7063 this, expression = expression, this 7064 7065 if self._match(TokenType.COLLATE): 7066 collation = self._parse_bitwise() 7067 7068 return self.expression( 7069 exp.Trim, this=this, position=position, expression=expression, collation=collation 7070 ) 7071 7072 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 7073 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 7074 7075 def _parse_named_window(self) -> t.Optional[exp.Expression]: 7076 return self._parse_window(self._parse_id_var(), alias=True) 7077 7078 def _parse_respect_or_ignore_nulls( 7079 self, this: t.Optional[exp.Expression] 7080 ) -> t.Optional[exp.Expression]: 7081 if self._match_text_seq("IGNORE", "NULLS"): 7082 return self.expression(exp.IgnoreNulls, this=this) 7083 if self._match_text_seq("RESPECT", "NULLS"): 7084 return self.expression(exp.RespectNulls, this=this) 7085 return this 7086 7087 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 7088 if self._match(TokenType.HAVING): 7089 self._match_texts(("MAX", "MIN")) 7090 max = self._prev.text.upper() != "MIN" 7091 return self.expression( 7092 exp.HavingMax, this=this, expression=self._parse_column(), max=max 7093 ) 7094 7095 return this 7096 7097 def _parse_window( 7098 self, this: t.Optional[exp.Expression], alias: bool = False 7099 ) -> t.Optional[exp.Expression]: 7100 func = this 7101 comments = func.comments if isinstance(func, exp.Expression) else None 7102 7103 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 7104 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 7105 if self._match_text_seq("WITHIN", "GROUP"): 7106 order = self._parse_wrapped(self._parse_order) 7107 this = self.expression(exp.WithinGroup, this=this, expression=order) 7108 7109 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 7110 self._match(TokenType.WHERE) 7111 this = self.expression( 7112 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 7113 ) 7114 self._match_r_paren() 7115 7116 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 7117 # Some dialects choose to implement and some do not. 7118 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 7119 7120 # There is some code above in _parse_lambda that handles 7121 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 7122 7123 # The below changes handle 7124 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 7125 7126 # Oracle allows both formats 7127 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 7128 # and Snowflake chose to do the same for familiarity 7129 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 7130 if isinstance(this, exp.AggFunc): 7131 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 7132 7133 if ignore_respect and ignore_respect is not this: 7134 ignore_respect.replace(ignore_respect.this) 7135 this = self.expression(ignore_respect.__class__, this=this) 7136 7137 this = self._parse_respect_or_ignore_nulls(this) 7138 7139 # bigquery select from window x AS (partition by ...) 7140 if alias: 7141 over = None 7142 self._match(TokenType.ALIAS) 7143 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 7144 return this 7145 else: 7146 over = self._prev.text.upper() 7147 7148 if comments and isinstance(func, exp.Expression): 7149 func.pop_comments() 7150 7151 if not self._match(TokenType.L_PAREN): 7152 return self.expression( 7153 exp.Window, 7154 comments=comments, 7155 this=this, 7156 alias=self._parse_id_var(False), 7157 over=over, 7158 ) 7159 7160 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 7161 7162 first = self._match(TokenType.FIRST) 7163 if self._match_text_seq("LAST"): 7164 first = False 7165 7166 partition, order = self._parse_partition_and_order() 7167 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 7168 7169 if kind: 7170 self._match(TokenType.BETWEEN) 7171 start = self._parse_window_spec() 7172 7173 end = self._parse_window_spec() if self._match(TokenType.AND) else {} 7174 exclude = ( 7175 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 7176 if self._match_text_seq("EXCLUDE") 7177 else None 7178 ) 7179 7180 spec = self.expression( 7181 exp.WindowSpec, 7182 kind=kind, 7183 start=start["value"], 7184 start_side=start["side"], 7185 end=end.get("value"), 7186 end_side=end.get("side"), 7187 exclude=exclude, 7188 ) 7189 else: 7190 spec = None 7191 7192 self._match_r_paren() 7193 7194 window = self.expression( 7195 exp.Window, 7196 comments=comments, 7197 this=this, 7198 partition_by=partition, 7199 order=order, 7200 spec=spec, 7201 alias=window_alias, 7202 over=over, 7203 first=first, 7204 ) 7205 7206 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 7207 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 7208 return self._parse_window(window, alias=alias) 7209 7210 return window 7211 7212 def _parse_partition_and_order( 7213 self, 7214 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 7215 return self._parse_partition_by(), self._parse_order() 7216 7217 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 7218 self._match(TokenType.BETWEEN) 7219 7220 return { 7221 "value": ( 7222 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 7223 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 7224 or self._parse_type() 7225 ), 7226 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 7227 } 7228 7229 def _parse_alias( 7230 self, this: t.Optional[exp.Expression], explicit: bool = False 7231 ) -> t.Optional[exp.Expression]: 7232 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 7233 # so this section tries to parse the clause version and if it fails, it treats the token 7234 # as an identifier (alias) 7235 if self._can_parse_limit_or_offset(): 7236 return this 7237 7238 any_token = self._match(TokenType.ALIAS) 7239 comments = self._prev_comments or [] 7240 7241 if explicit and not any_token: 7242 return this 7243 7244 if self._match(TokenType.L_PAREN): 7245 aliases = self.expression( 7246 exp.Aliases, 7247 comments=comments, 7248 this=this, 7249 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 7250 ) 7251 self._match_r_paren(aliases) 7252 return aliases 7253 7254 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 7255 self.STRING_ALIASES and self._parse_string_as_identifier() 7256 ) 7257 7258 if alias: 7259 comments.extend(alias.pop_comments()) 7260 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 7261 column = this.this 7262 7263 # Moves the comment next to the alias in `expr /* comment */ AS alias` 7264 if not this.comments and column and column.comments: 7265 this.comments = column.pop_comments() 7266 7267 return this 7268 7269 def _parse_id_var( 7270 self, 7271 any_token: bool = True, 7272 tokens: t.Optional[t.Collection[TokenType]] = None, 7273 ) -> t.Optional[exp.Expression]: 7274 expression = self._parse_identifier() 7275 if not expression and ( 7276 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7277 ): 7278 quoted = self._prev.token_type == TokenType.STRING 7279 expression = self._identifier_expression(quoted=quoted) 7280 7281 return expression 7282 7283 def _parse_string(self) -> t.Optional[exp.Expression]: 7284 if self._match_set(self.STRING_PARSERS): 7285 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7286 return self._parse_placeholder() 7287 7288 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7289 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7290 if output: 7291 output.update_positions(self._prev) 7292 return output 7293 7294 def _parse_number(self) -> t.Optional[exp.Expression]: 7295 if self._match_set(self.NUMERIC_PARSERS): 7296 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7297 return self._parse_placeholder() 7298 7299 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7300 if self._match(TokenType.IDENTIFIER): 7301 return self._identifier_expression(quoted=True) 7302 return self._parse_placeholder() 7303 7304 def _parse_var( 7305 self, 7306 any_token: bool = False, 7307 tokens: t.Optional[t.Collection[TokenType]] = None, 7308 upper: bool = False, 7309 ) -> t.Optional[exp.Expression]: 7310 if ( 7311 (any_token and self._advance_any()) 7312 or self._match(TokenType.VAR) 7313 or (self._match_set(tokens) if tokens else False) 7314 ): 7315 return self.expression( 7316 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7317 ) 7318 return self._parse_placeholder() 7319 7320 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7321 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7322 self._advance() 7323 return self._prev 7324 return None 7325 7326 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7327 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7328 7329 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7330 return self._parse_primary() or self._parse_var(any_token=True) 7331 7332 def _parse_null(self) -> t.Optional[exp.Expression]: 7333 if self._match_set((TokenType.NULL, TokenType.UNKNOWN)): 7334 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7335 return self._parse_placeholder() 7336 7337 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7338 if self._match(TokenType.TRUE): 7339 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7340 if self._match(TokenType.FALSE): 7341 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7342 return self._parse_placeholder() 7343 7344 def _parse_star(self) -> t.Optional[exp.Expression]: 7345 if self._match(TokenType.STAR): 7346 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7347 return self._parse_placeholder() 7348 7349 def _parse_parameter(self) -> exp.Parameter: 7350 this = self._parse_identifier() or self._parse_primary_or_var() 7351 return self.expression(exp.Parameter, this=this) 7352 7353 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7354 if self._match_set(self.PLACEHOLDER_PARSERS): 7355 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7356 if placeholder: 7357 return placeholder 7358 self._advance(-1) 7359 return None 7360 7361 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7362 if not self._match_texts(keywords): 7363 return None 7364 if self._match(TokenType.L_PAREN, advance=False): 7365 return self._parse_wrapped_csv(self._parse_expression) 7366 7367 expression = self._parse_alias(self._parse_assignment(), explicit=True) 7368 return [expression] if expression else None 7369 7370 def _parse_csv( 7371 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7372 ) -> t.List[exp.Expression]: 7373 parse_result = parse_method() 7374 items = [parse_result] if parse_result is not None else [] 7375 7376 while self._match(sep): 7377 self._add_comments(parse_result) 7378 parse_result = parse_method() 7379 if parse_result is not None: 7380 items.append(parse_result) 7381 7382 return items 7383 7384 def _parse_tokens( 7385 self, parse_method: t.Callable, expressions: t.Dict 7386 ) -> t.Optional[exp.Expression]: 7387 this = parse_method() 7388 7389 while self._match_set(expressions): 7390 this = self.expression( 7391 expressions[self._prev.token_type], 7392 this=this, 7393 comments=self._prev_comments, 7394 expression=parse_method(), 7395 ) 7396 7397 return this 7398 7399 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7400 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7401 7402 def _parse_wrapped_csv( 7403 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7404 ) -> t.List[exp.Expression]: 7405 return self._parse_wrapped( 7406 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7407 ) 7408 7409 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7410 wrapped = self._match(TokenType.L_PAREN) 7411 if not wrapped and not optional: 7412 self.raise_error("Expecting (") 7413 parse_result = parse_method() 7414 if wrapped: 7415 self._match_r_paren() 7416 return parse_result 7417 7418 def _parse_expressions(self) -> t.List[exp.Expression]: 7419 return self._parse_csv(self._parse_expression) 7420 7421 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7422 return ( 7423 self._parse_set_operations( 7424 self._parse_alias(self._parse_assignment(), explicit=True) 7425 if alias 7426 else self._parse_assignment() 7427 ) 7428 or self._parse_select() 7429 ) 7430 7431 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7432 return self._parse_query_modifiers( 7433 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7434 ) 7435 7436 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7437 this = None 7438 if self._match_texts(self.TRANSACTION_KIND): 7439 this = self._prev.text 7440 7441 self._match_texts(("TRANSACTION", "WORK")) 7442 7443 modes = [] 7444 while True: 7445 mode = [] 7446 while self._match(TokenType.VAR) or self._match(TokenType.NOT): 7447 mode.append(self._prev.text) 7448 7449 if mode: 7450 modes.append(" ".join(mode)) 7451 if not self._match(TokenType.COMMA): 7452 break 7453 7454 return self.expression(exp.Transaction, this=this, modes=modes) 7455 7456 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7457 chain = None 7458 savepoint = None 7459 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7460 7461 self._match_texts(("TRANSACTION", "WORK")) 7462 7463 if self._match_text_seq("TO"): 7464 self._match_text_seq("SAVEPOINT") 7465 savepoint = self._parse_id_var() 7466 7467 if self._match(TokenType.AND): 7468 chain = not self._match_text_seq("NO") 7469 self._match_text_seq("CHAIN") 7470 7471 if is_rollback: 7472 return self.expression(exp.Rollback, savepoint=savepoint) 7473 7474 return self.expression(exp.Commit, chain=chain) 7475 7476 def _parse_refresh(self) -> exp.Refresh: 7477 self._match(TokenType.TABLE) 7478 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7479 7480 def _parse_column_def_with_exists(self): 7481 start = self._index 7482 self._match(TokenType.COLUMN) 7483 7484 exists_column = self._parse_exists(not_=True) 7485 expression = self._parse_field_def() 7486 7487 if not isinstance(expression, exp.ColumnDef): 7488 self._retreat(start) 7489 return None 7490 7491 expression.set("exists", exists_column) 7492 7493 return expression 7494 7495 def _parse_add_column(self) -> t.Optional[exp.ColumnDef]: 7496 if not self._prev.text.upper() == "ADD": 7497 return None 7498 7499 expression = self._parse_column_def_with_exists() 7500 if not expression: 7501 return None 7502 7503 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7504 if self._match_texts(("FIRST", "AFTER")): 7505 position = self._prev.text 7506 column_position = self.expression( 7507 exp.ColumnPosition, this=self._parse_column(), position=position 7508 ) 7509 expression.set("position", column_position) 7510 7511 return expression 7512 7513 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7514 drop = self._match(TokenType.DROP) and self._parse_drop() 7515 if drop and not isinstance(drop, exp.Command): 7516 drop.set("kind", drop.args.get("kind", "COLUMN")) 7517 return drop 7518 7519 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7520 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7521 return self.expression( 7522 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7523 ) 7524 7525 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7526 def _parse_add_alteration() -> t.Optional[exp.Expression]: 7527 self._match_text_seq("ADD") 7528 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7529 return self.expression( 7530 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7531 ) 7532 7533 column_def = self._parse_add_column() 7534 if isinstance(column_def, exp.ColumnDef): 7535 return column_def 7536 7537 exists = self._parse_exists(not_=True) 7538 if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False): 7539 return self.expression( 7540 exp.AddPartition, 7541 exists=exists, 7542 this=self._parse_field(any_token=True), 7543 location=self._match_text_seq("LOCATION", advance=False) 7544 and self._parse_property(), 7545 ) 7546 7547 return None 7548 7549 if not self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False) and ( 7550 not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN 7551 or self._match_text_seq("COLUMNS") 7552 ): 7553 schema = self._parse_schema() 7554 7555 return ( 7556 ensure_list(schema) 7557 if schema 7558 else self._parse_csv(self._parse_column_def_with_exists) 7559 ) 7560 7561 return self._parse_csv(_parse_add_alteration) 7562 7563 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7564 if self._match_texts(self.ALTER_ALTER_PARSERS): 7565 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7566 7567 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7568 # keyword after ALTER we default to parsing this statement 7569 self._match(TokenType.COLUMN) 7570 column = self._parse_field(any_token=True) 7571 7572 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7573 return self.expression(exp.AlterColumn, this=column, drop=True) 7574 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7575 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7576 if self._match(TokenType.COMMENT): 7577 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7578 if self._match_text_seq("DROP", "NOT", "NULL"): 7579 return self.expression( 7580 exp.AlterColumn, 7581 this=column, 7582 drop=True, 7583 allow_null=True, 7584 ) 7585 if self._match_text_seq("SET", "NOT", "NULL"): 7586 return self.expression( 7587 exp.AlterColumn, 7588 this=column, 7589 allow_null=False, 7590 ) 7591 7592 if self._match_text_seq("SET", "VISIBLE"): 7593 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7594 if self._match_text_seq("SET", "INVISIBLE"): 7595 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7596 7597 self._match_text_seq("SET", "DATA") 7598 self._match_text_seq("TYPE") 7599 return self.expression( 7600 exp.AlterColumn, 7601 this=column, 7602 dtype=self._parse_types(), 7603 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7604 using=self._match(TokenType.USING) and self._parse_assignment(), 7605 ) 7606 7607 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7608 if self._match_texts(("ALL", "EVEN", "AUTO")): 7609 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7610 7611 self._match_text_seq("KEY", "DISTKEY") 7612 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7613 7614 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7615 if compound: 7616 self._match_text_seq("SORTKEY") 7617 7618 if self._match(TokenType.L_PAREN, advance=False): 7619 return self.expression( 7620 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7621 ) 7622 7623 self._match_texts(("AUTO", "NONE")) 7624 return self.expression( 7625 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7626 ) 7627 7628 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7629 index = self._index - 1 7630 7631 partition_exists = self._parse_exists() 7632 if self._match(TokenType.PARTITION, advance=False): 7633 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7634 7635 self._retreat(index) 7636 return self._parse_csv(self._parse_drop_column) 7637 7638 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7639 if self._match(TokenType.COLUMN) or not self.ALTER_RENAME_REQUIRES_COLUMN: 7640 exists = self._parse_exists() 7641 old_column = self._parse_column() 7642 to = self._match_text_seq("TO") 7643 new_column = self._parse_column() 7644 7645 if old_column is None or to is None or new_column is None: 7646 return None 7647 7648 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7649 7650 self._match_text_seq("TO") 7651 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7652 7653 def _parse_alter_table_set(self) -> exp.AlterSet: 7654 alter_set = self.expression(exp.AlterSet) 7655 7656 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7657 "TABLE", "PROPERTIES" 7658 ): 7659 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7660 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7661 alter_set.set("expressions", [self._parse_assignment()]) 7662 elif self._match_texts(("LOGGED", "UNLOGGED")): 7663 alter_set.set("option", exp.var(self._prev.text.upper())) 7664 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7665 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7666 elif self._match_text_seq("LOCATION"): 7667 alter_set.set("location", self._parse_field()) 7668 elif self._match_text_seq("ACCESS", "METHOD"): 7669 alter_set.set("access_method", self._parse_field()) 7670 elif self._match_text_seq("TABLESPACE"): 7671 alter_set.set("tablespace", self._parse_field()) 7672 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7673 alter_set.set("file_format", [self._parse_field()]) 7674 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7675 alter_set.set("file_format", self._parse_wrapped_options()) 7676 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7677 alter_set.set("copy_options", self._parse_wrapped_options()) 7678 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7679 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7680 else: 7681 if self._match_text_seq("SERDE"): 7682 alter_set.set("serde", self._parse_field()) 7683 7684 properties = self._parse_wrapped(self._parse_properties, optional=True) 7685 alter_set.set("expressions", [properties]) 7686 7687 return alter_set 7688 7689 def _parse_alter_session(self) -> exp.AlterSession: 7690 """Parse ALTER SESSION SET/UNSET statements.""" 7691 if self._match(TokenType.SET): 7692 expressions = self._parse_csv(lambda: self._parse_set_item_assignment()) 7693 return self.expression(exp.AlterSession, expressions=expressions, unset=False) 7694 7695 self._match_text_seq("UNSET") 7696 expressions = self._parse_csv( 7697 lambda: self.expression(exp.SetItem, this=self._parse_id_var(any_token=True)) 7698 ) 7699 return self.expression(exp.AlterSession, expressions=expressions, unset=True) 7700 7701 def _parse_alter(self) -> exp.Alter | exp.Command: 7702 start = self._prev 7703 7704 alter_token = self._match_set(self.ALTERABLES) and self._prev 7705 if not alter_token: 7706 return self._parse_as_command(start) 7707 7708 exists = self._parse_exists() 7709 only = self._match_text_seq("ONLY") 7710 7711 if alter_token.token_type == TokenType.SESSION: 7712 this = None 7713 check = None 7714 cluster = None 7715 else: 7716 this = self._parse_table(schema=True) 7717 check = self._match_text_seq("WITH", "CHECK") 7718 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7719 7720 if self._next: 7721 self._advance() 7722 7723 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7724 if parser: 7725 actions = ensure_list(parser(self)) 7726 not_valid = self._match_text_seq("NOT", "VALID") 7727 options = self._parse_csv(self._parse_property) 7728 7729 if not self._curr and actions: 7730 return self.expression( 7731 exp.Alter, 7732 this=this, 7733 kind=alter_token.text.upper(), 7734 exists=exists, 7735 actions=actions, 7736 only=only, 7737 options=options, 7738 cluster=cluster, 7739 not_valid=not_valid, 7740 check=check, 7741 ) 7742 7743 return self._parse_as_command(start) 7744 7745 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7746 start = self._prev 7747 # https://duckdb.org/docs/sql/statements/analyze 7748 if not self._curr: 7749 return self.expression(exp.Analyze) 7750 7751 options = [] 7752 while self._match_texts(self.ANALYZE_STYLES): 7753 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7754 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7755 else: 7756 options.append(self._prev.text.upper()) 7757 7758 this: t.Optional[exp.Expression] = None 7759 inner_expression: t.Optional[exp.Expression] = None 7760 7761 kind = self._curr and self._curr.text.upper() 7762 7763 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7764 this = self._parse_table_parts() 7765 elif self._match_text_seq("TABLES"): 7766 if self._match_set((TokenType.FROM, TokenType.IN)): 7767 kind = f"{kind} {self._prev.text.upper()}" 7768 this = self._parse_table(schema=True, is_db_reference=True) 7769 elif self._match_text_seq("DATABASE"): 7770 this = self._parse_table(schema=True, is_db_reference=True) 7771 elif self._match_text_seq("CLUSTER"): 7772 this = self._parse_table() 7773 # Try matching inner expr keywords before fallback to parse table. 7774 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7775 kind = None 7776 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7777 else: 7778 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7779 kind = None 7780 this = self._parse_table_parts() 7781 7782 partition = self._try_parse(self._parse_partition) 7783 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7784 return self._parse_as_command(start) 7785 7786 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7787 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7788 "WITH", "ASYNC", "MODE" 7789 ): 7790 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7791 else: 7792 mode = None 7793 7794 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7795 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7796 7797 properties = self._parse_properties() 7798 return self.expression( 7799 exp.Analyze, 7800 kind=kind, 7801 this=this, 7802 mode=mode, 7803 partition=partition, 7804 properties=properties, 7805 expression=inner_expression, 7806 options=options, 7807 ) 7808 7809 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7810 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7811 this = None 7812 kind = self._prev.text.upper() 7813 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7814 expressions = [] 7815 7816 if not self._match_text_seq("STATISTICS"): 7817 self.raise_error("Expecting token STATISTICS") 7818 7819 if self._match_text_seq("NOSCAN"): 7820 this = "NOSCAN" 7821 elif self._match(TokenType.FOR): 7822 if self._match_text_seq("ALL", "COLUMNS"): 7823 this = "FOR ALL COLUMNS" 7824 if self._match_texts("COLUMNS"): 7825 this = "FOR COLUMNS" 7826 expressions = self._parse_csv(self._parse_column_reference) 7827 elif self._match_text_seq("SAMPLE"): 7828 sample = self._parse_number() 7829 expressions = [ 7830 self.expression( 7831 exp.AnalyzeSample, 7832 sample=sample, 7833 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7834 ) 7835 ] 7836 7837 return self.expression( 7838 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7839 ) 7840 7841 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7842 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7843 kind = None 7844 this = None 7845 expression: t.Optional[exp.Expression] = None 7846 if self._match_text_seq("REF", "UPDATE"): 7847 kind = "REF" 7848 this = "UPDATE" 7849 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7850 this = "UPDATE SET DANGLING TO NULL" 7851 elif self._match_text_seq("STRUCTURE"): 7852 kind = "STRUCTURE" 7853 if self._match_text_seq("CASCADE", "FAST"): 7854 this = "CASCADE FAST" 7855 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7856 ("ONLINE", "OFFLINE") 7857 ): 7858 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7859 expression = self._parse_into() 7860 7861 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7862 7863 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7864 this = self._prev.text.upper() 7865 if self._match_text_seq("COLUMNS"): 7866 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7867 return None 7868 7869 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7870 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7871 if self._match_text_seq("STATISTICS"): 7872 return self.expression(exp.AnalyzeDelete, kind=kind) 7873 return None 7874 7875 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7876 if self._match_text_seq("CHAINED", "ROWS"): 7877 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7878 return None 7879 7880 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7881 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7882 this = self._prev.text.upper() 7883 expression: t.Optional[exp.Expression] = None 7884 expressions = [] 7885 update_options = None 7886 7887 if self._match_text_seq("HISTOGRAM", "ON"): 7888 expressions = self._parse_csv(self._parse_column_reference) 7889 with_expressions = [] 7890 while self._match(TokenType.WITH): 7891 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7892 if self._match_texts(("SYNC", "ASYNC")): 7893 if self._match_text_seq("MODE", advance=False): 7894 with_expressions.append(f"{self._prev.text.upper()} MODE") 7895 self._advance() 7896 else: 7897 buckets = self._parse_number() 7898 if self._match_text_seq("BUCKETS"): 7899 with_expressions.append(f"{buckets} BUCKETS") 7900 if with_expressions: 7901 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7902 7903 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7904 TokenType.UPDATE, advance=False 7905 ): 7906 update_options = self._prev.text.upper() 7907 self._advance() 7908 elif self._match_text_seq("USING", "DATA"): 7909 expression = self.expression(exp.UsingData, this=self._parse_string()) 7910 7911 return self.expression( 7912 exp.AnalyzeHistogram, 7913 this=this, 7914 expressions=expressions, 7915 expression=expression, 7916 update_options=update_options, 7917 ) 7918 7919 def _parse_merge(self) -> exp.Merge: 7920 self._match(TokenType.INTO) 7921 target = self._parse_table() 7922 7923 if target and self._match(TokenType.ALIAS, advance=False): 7924 target.set("alias", self._parse_table_alias()) 7925 7926 self._match(TokenType.USING) 7927 using = self._parse_table() 7928 7929 self._match(TokenType.ON) 7930 on = self._parse_assignment() 7931 7932 return self.expression( 7933 exp.Merge, 7934 this=target, 7935 using=using, 7936 on=on, 7937 whens=self._parse_when_matched(), 7938 returning=self._parse_returning(), 7939 ) 7940 7941 def _parse_when_matched(self) -> exp.Whens: 7942 whens = [] 7943 7944 while self._match(TokenType.WHEN): 7945 matched = not self._match(TokenType.NOT) 7946 self._match_text_seq("MATCHED") 7947 source = ( 7948 False 7949 if self._match_text_seq("BY", "TARGET") 7950 else self._match_text_seq("BY", "SOURCE") 7951 ) 7952 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7953 7954 self._match(TokenType.THEN) 7955 7956 if self._match(TokenType.INSERT): 7957 this = self._parse_star() 7958 if this: 7959 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7960 else: 7961 then = self.expression( 7962 exp.Insert, 7963 this=exp.var("ROW") 7964 if self._match_text_seq("ROW") 7965 else self._parse_value(values=False), 7966 expression=self._match_text_seq("VALUES") and self._parse_value(), 7967 ) 7968 elif self._match(TokenType.UPDATE): 7969 expressions = self._parse_star() 7970 if expressions: 7971 then = self.expression(exp.Update, expressions=expressions) 7972 else: 7973 then = self.expression( 7974 exp.Update, 7975 expressions=self._match(TokenType.SET) 7976 and self._parse_csv(self._parse_equality), 7977 ) 7978 elif self._match(TokenType.DELETE): 7979 then = self.expression(exp.Var, this=self._prev.text) 7980 else: 7981 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7982 7983 whens.append( 7984 self.expression( 7985 exp.When, 7986 matched=matched, 7987 source=source, 7988 condition=condition, 7989 then=then, 7990 ) 7991 ) 7992 return self.expression(exp.Whens, expressions=whens) 7993 7994 def _parse_show(self) -> t.Optional[exp.Expression]: 7995 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7996 if parser: 7997 return parser(self) 7998 return self._parse_as_command(self._prev) 7999 8000 def _parse_set_item_assignment( 8001 self, kind: t.Optional[str] = None 8002 ) -> t.Optional[exp.Expression]: 8003 index = self._index 8004 8005 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 8006 return self._parse_set_transaction(global_=kind == "GLOBAL") 8007 8008 left = self._parse_primary() or self._parse_column() 8009 assignment_delimiter = self._match_texts(("=", "TO")) 8010 8011 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 8012 self._retreat(index) 8013 return None 8014 8015 right = self._parse_statement() or self._parse_id_var() 8016 if isinstance(right, (exp.Column, exp.Identifier)): 8017 right = exp.var(right.name) 8018 8019 this = self.expression(exp.EQ, this=left, expression=right) 8020 return self.expression(exp.SetItem, this=this, kind=kind) 8021 8022 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 8023 self._match_text_seq("TRANSACTION") 8024 characteristics = self._parse_csv( 8025 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 8026 ) 8027 return self.expression( 8028 exp.SetItem, 8029 expressions=characteristics, 8030 kind="TRANSACTION", 8031 **{"global": global_}, # type: ignore 8032 ) 8033 8034 def _parse_set_item(self) -> t.Optional[exp.Expression]: 8035 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 8036 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 8037 8038 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 8039 index = self._index 8040 set_ = self.expression( 8041 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 8042 ) 8043 8044 if self._curr: 8045 self._retreat(index) 8046 return self._parse_as_command(self._prev) 8047 8048 return set_ 8049 8050 def _parse_var_from_options( 8051 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 8052 ) -> t.Optional[exp.Var]: 8053 start = self._curr 8054 if not start: 8055 return None 8056 8057 option = start.text.upper() 8058 continuations = options.get(option) 8059 8060 index = self._index 8061 self._advance() 8062 for keywords in continuations or []: 8063 if isinstance(keywords, str): 8064 keywords = (keywords,) 8065 8066 if self._match_text_seq(*keywords): 8067 option = f"{option} {' '.join(keywords)}" 8068 break 8069 else: 8070 if continuations or continuations is None: 8071 if raise_unmatched: 8072 self.raise_error(f"Unknown option {option}") 8073 8074 self._retreat(index) 8075 return None 8076 8077 return exp.var(option) 8078 8079 def _parse_as_command(self, start: Token) -> exp.Command: 8080 while self._curr: 8081 self._advance() 8082 text = self._find_sql(start, self._prev) 8083 size = len(start.text) 8084 self._warn_unsupported() 8085 return exp.Command(this=text[:size], expression=text[size:]) 8086 8087 def _parse_dict_property(self, this: str) -> exp.DictProperty: 8088 settings = [] 8089 8090 self._match_l_paren() 8091 kind = self._parse_id_var() 8092 8093 if self._match(TokenType.L_PAREN): 8094 while True: 8095 key = self._parse_id_var() 8096 value = self._parse_primary() 8097 if not key and value is None: 8098 break 8099 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 8100 self._match(TokenType.R_PAREN) 8101 8102 self._match_r_paren() 8103 8104 return self.expression( 8105 exp.DictProperty, 8106 this=this, 8107 kind=kind.this if kind else None, 8108 settings=settings, 8109 ) 8110 8111 def _parse_dict_range(self, this: str) -> exp.DictRange: 8112 self._match_l_paren() 8113 has_min = self._match_text_seq("MIN") 8114 if has_min: 8115 min = self._parse_var() or self._parse_primary() 8116 self._match_text_seq("MAX") 8117 max = self._parse_var() or self._parse_primary() 8118 else: 8119 max = self._parse_var() or self._parse_primary() 8120 min = exp.Literal.number(0) 8121 self._match_r_paren() 8122 return self.expression(exp.DictRange, this=this, min=min, max=max) 8123 8124 def _parse_comprehension( 8125 self, this: t.Optional[exp.Expression] 8126 ) -> t.Optional[exp.Comprehension]: 8127 index = self._index 8128 expression = self._parse_column() 8129 if not self._match(TokenType.IN): 8130 self._retreat(index - 1) 8131 return None 8132 iterator = self._parse_column() 8133 condition = self._parse_assignment() if self._match_text_seq("IF") else None 8134 return self.expression( 8135 exp.Comprehension, 8136 this=this, 8137 expression=expression, 8138 iterator=iterator, 8139 condition=condition, 8140 ) 8141 8142 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 8143 if self._match(TokenType.HEREDOC_STRING): 8144 return self.expression(exp.Heredoc, this=self._prev.text) 8145 8146 if not self._match_text_seq("$"): 8147 return None 8148 8149 tags = ["$"] 8150 tag_text = None 8151 8152 if self._is_connected(): 8153 self._advance() 8154 tags.append(self._prev.text.upper()) 8155 else: 8156 self.raise_error("No closing $ found") 8157 8158 if tags[-1] != "$": 8159 if self._is_connected() and self._match_text_seq("$"): 8160 tag_text = tags[-1] 8161 tags.append("$") 8162 else: 8163 self.raise_error("No closing $ found") 8164 8165 heredoc_start = self._curr 8166 8167 while self._curr: 8168 if self._match_text_seq(*tags, advance=False): 8169 this = self._find_sql(heredoc_start, self._prev) 8170 self._advance(len(tags)) 8171 return self.expression(exp.Heredoc, this=this, tag=tag_text) 8172 8173 self._advance() 8174 8175 self.raise_error(f"No closing {''.join(tags)} found") 8176 return None 8177 8178 def _find_parser( 8179 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 8180 ) -> t.Optional[t.Callable]: 8181 if not self._curr: 8182 return None 8183 8184 index = self._index 8185 this = [] 8186 while True: 8187 # The current token might be multiple words 8188 curr = self._curr.text.upper() 8189 key = curr.split(" ") 8190 this.append(curr) 8191 8192 self._advance() 8193 result, trie = in_trie(trie, key) 8194 if result == TrieResult.FAILED: 8195 break 8196 8197 if result == TrieResult.EXISTS: 8198 subparser = parsers[" ".join(this)] 8199 return subparser 8200 8201 self._retreat(index) 8202 return None 8203 8204 def _match(self, token_type, advance=True, expression=None): 8205 if not self._curr: 8206 return None 8207 8208 if self._curr.token_type == token_type: 8209 if advance: 8210 self._advance() 8211 self._add_comments(expression) 8212 return True 8213 8214 return None 8215 8216 def _match_set(self, types, advance=True): 8217 if not self._curr: 8218 return None 8219 8220 if self._curr.token_type in types: 8221 if advance: 8222 self._advance() 8223 return True 8224 8225 return None 8226 8227 def _match_pair(self, token_type_a, token_type_b, advance=True): 8228 if not self._curr or not self._next: 8229 return None 8230 8231 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 8232 if advance: 8233 self._advance(2) 8234 return True 8235 8236 return None 8237 8238 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8239 if not self._match(TokenType.L_PAREN, expression=expression): 8240 self.raise_error("Expecting (") 8241 8242 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8243 if not self._match(TokenType.R_PAREN, expression=expression): 8244 self.raise_error("Expecting )") 8245 8246 def _match_texts(self, texts, advance=True): 8247 if ( 8248 self._curr 8249 and self._curr.token_type != TokenType.STRING 8250 and self._curr.text.upper() in texts 8251 ): 8252 if advance: 8253 self._advance() 8254 return True 8255 return None 8256 8257 def _match_text_seq(self, *texts, advance=True): 8258 index = self._index 8259 for text in texts: 8260 if ( 8261 self._curr 8262 and self._curr.token_type != TokenType.STRING 8263 and self._curr.text.upper() == text 8264 ): 8265 self._advance() 8266 else: 8267 self._retreat(index) 8268 return None 8269 8270 if not advance: 8271 self._retreat(index) 8272 8273 return True 8274 8275 def _replace_lambda( 8276 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 8277 ) -> t.Optional[exp.Expression]: 8278 if not node: 8279 return node 8280 8281 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 8282 8283 for column in node.find_all(exp.Column): 8284 typ = lambda_types.get(column.parts[0].name) 8285 if typ is not None: 8286 dot_or_id = column.to_dot() if column.table else column.this 8287 8288 if typ: 8289 dot_or_id = self.expression( 8290 exp.Cast, 8291 this=dot_or_id, 8292 to=typ, 8293 ) 8294 8295 parent = column.parent 8296 8297 while isinstance(parent, exp.Dot): 8298 if not isinstance(parent.parent, exp.Dot): 8299 parent.replace(dot_or_id) 8300 break 8301 parent = parent.parent 8302 else: 8303 if column is node: 8304 node = dot_or_id 8305 else: 8306 column.replace(dot_or_id) 8307 return node 8308 8309 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 8310 start = self._prev 8311 8312 # Not to be confused with TRUNCATE(number, decimals) function call 8313 if self._match(TokenType.L_PAREN): 8314 self._retreat(self._index - 2) 8315 return self._parse_function() 8316 8317 # Clickhouse supports TRUNCATE DATABASE as well 8318 is_database = self._match(TokenType.DATABASE) 8319 8320 self._match(TokenType.TABLE) 8321 8322 exists = self._parse_exists(not_=False) 8323 8324 expressions = self._parse_csv( 8325 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8326 ) 8327 8328 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8329 8330 if self._match_text_seq("RESTART", "IDENTITY"): 8331 identity = "RESTART" 8332 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8333 identity = "CONTINUE" 8334 else: 8335 identity = None 8336 8337 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8338 option = self._prev.text 8339 else: 8340 option = None 8341 8342 partition = self._parse_partition() 8343 8344 # Fallback case 8345 if self._curr: 8346 return self._parse_as_command(start) 8347 8348 return self.expression( 8349 exp.TruncateTable, 8350 expressions=expressions, 8351 is_database=is_database, 8352 exists=exists, 8353 cluster=cluster, 8354 identity=identity, 8355 option=option, 8356 partition=partition, 8357 ) 8358 8359 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8360 this = self._parse_ordered(self._parse_opclass) 8361 8362 if not self._match(TokenType.WITH): 8363 return this 8364 8365 op = self._parse_var(any_token=True) 8366 8367 return self.expression(exp.WithOperator, this=this, op=op) 8368 8369 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8370 self._match(TokenType.EQ) 8371 self._match(TokenType.L_PAREN) 8372 8373 opts: t.List[t.Optional[exp.Expression]] = [] 8374 option: exp.Expression | None 8375 while self._curr and not self._match(TokenType.R_PAREN): 8376 if self._match_text_seq("FORMAT_NAME", "="): 8377 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8378 option = self._parse_format_name() 8379 else: 8380 option = self._parse_property() 8381 8382 if option is None: 8383 self.raise_error("Unable to parse option") 8384 break 8385 8386 opts.append(option) 8387 8388 return opts 8389 8390 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8391 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8392 8393 options = [] 8394 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8395 option = self._parse_var(any_token=True) 8396 prev = self._prev.text.upper() 8397 8398 # Different dialects might separate options and values by white space, "=" and "AS" 8399 self._match(TokenType.EQ) 8400 self._match(TokenType.ALIAS) 8401 8402 param = self.expression(exp.CopyParameter, this=option) 8403 8404 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8405 TokenType.L_PAREN, advance=False 8406 ): 8407 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8408 param.set("expressions", self._parse_wrapped_options()) 8409 elif prev == "FILE_FORMAT": 8410 # T-SQL's external file format case 8411 param.set("expression", self._parse_field()) 8412 else: 8413 param.set("expression", self._parse_unquoted_field()) 8414 8415 options.append(param) 8416 self._match(sep) 8417 8418 return options 8419 8420 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8421 expr = self.expression(exp.Credentials) 8422 8423 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8424 expr.set("storage", self._parse_field()) 8425 if self._match_text_seq("CREDENTIALS"): 8426 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8427 creds = ( 8428 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8429 ) 8430 expr.set("credentials", creds) 8431 if self._match_text_seq("ENCRYPTION"): 8432 expr.set("encryption", self._parse_wrapped_options()) 8433 if self._match_text_seq("IAM_ROLE"): 8434 expr.set("iam_role", self._parse_field()) 8435 if self._match_text_seq("REGION"): 8436 expr.set("region", self._parse_field()) 8437 8438 return expr 8439 8440 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8441 return self._parse_field() 8442 8443 def _parse_copy(self) -> exp.Copy | exp.Command: 8444 start = self._prev 8445 8446 self._match(TokenType.INTO) 8447 8448 this = ( 8449 self._parse_select(nested=True, parse_subquery_alias=False) 8450 if self._match(TokenType.L_PAREN, advance=False) 8451 else self._parse_table(schema=True) 8452 ) 8453 8454 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8455 8456 files = self._parse_csv(self._parse_file_location) 8457 if self._match(TokenType.EQ, advance=False): 8458 # Backtrack one token since we've consumed the lhs of a parameter assignment here. 8459 # This can happen for Snowflake dialect. Instead, we'd like to parse the parameter 8460 # list via `_parse_wrapped(..)` below. 8461 self._advance(-1) 8462 files = [] 8463 8464 credentials = self._parse_credentials() 8465 8466 self._match_text_seq("WITH") 8467 8468 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8469 8470 # Fallback case 8471 if self._curr: 8472 return self._parse_as_command(start) 8473 8474 return self.expression( 8475 exp.Copy, 8476 this=this, 8477 kind=kind, 8478 credentials=credentials, 8479 files=files, 8480 params=params, 8481 ) 8482 8483 def _parse_normalize(self) -> exp.Normalize: 8484 return self.expression( 8485 exp.Normalize, 8486 this=self._parse_bitwise(), 8487 form=self._match(TokenType.COMMA) and self._parse_var(), 8488 ) 8489 8490 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8491 args = self._parse_csv(lambda: self._parse_lambda()) 8492 8493 this = seq_get(args, 0) 8494 decimals = seq_get(args, 1) 8495 8496 return expr_type( 8497 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8498 ) 8499 8500 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8501 star_token = self._prev 8502 8503 if self._match_text_seq("COLUMNS", "(", advance=False): 8504 this = self._parse_function() 8505 if isinstance(this, exp.Columns): 8506 this.set("unpack", True) 8507 return this 8508 8509 return self.expression( 8510 exp.Star, 8511 **{ # type: ignore 8512 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8513 "replace": self._parse_star_op("REPLACE"), 8514 "rename": self._parse_star_op("RENAME"), 8515 }, 8516 ).update_positions(star_token) 8517 8518 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8519 privilege_parts = [] 8520 8521 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8522 # (end of privilege list) or L_PAREN (start of column list) are met 8523 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8524 privilege_parts.append(self._curr.text.upper()) 8525 self._advance() 8526 8527 this = exp.var(" ".join(privilege_parts)) 8528 expressions = ( 8529 self._parse_wrapped_csv(self._parse_column) 8530 if self._match(TokenType.L_PAREN, advance=False) 8531 else None 8532 ) 8533 8534 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8535 8536 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8537 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8538 principal = self._parse_id_var() 8539 8540 if not principal: 8541 return None 8542 8543 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8544 8545 def _parse_grant_revoke_common( 8546 self, 8547 ) -> t.Tuple[t.Optional[t.List], t.Optional[str], t.Optional[exp.Expression]]: 8548 privileges = self._parse_csv(self._parse_grant_privilege) 8549 8550 self._match(TokenType.ON) 8551 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8552 8553 # Attempt to parse the securable e.g. MySQL allows names 8554 # such as "foo.*", "*.*" which are not easily parseable yet 8555 securable = self._try_parse(self._parse_table_parts) 8556 8557 return privileges, kind, securable 8558 8559 def _parse_grant(self) -> exp.Grant | exp.Command: 8560 start = self._prev 8561 8562 privileges, kind, securable = self._parse_grant_revoke_common() 8563 8564 if not securable or not self._match_text_seq("TO"): 8565 return self._parse_as_command(start) 8566 8567 principals = self._parse_csv(self._parse_grant_principal) 8568 8569 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8570 8571 if self._curr: 8572 return self._parse_as_command(start) 8573 8574 return self.expression( 8575 exp.Grant, 8576 privileges=privileges, 8577 kind=kind, 8578 securable=securable, 8579 principals=principals, 8580 grant_option=grant_option, 8581 ) 8582 8583 def _parse_revoke(self) -> exp.Revoke | exp.Command: 8584 start = self._prev 8585 8586 grant_option = self._match_text_seq("GRANT", "OPTION", "FOR") 8587 8588 privileges, kind, securable = self._parse_grant_revoke_common() 8589 8590 if not securable or not self._match_text_seq("FROM"): 8591 return self._parse_as_command(start) 8592 8593 principals = self._parse_csv(self._parse_grant_principal) 8594 8595 cascade = None 8596 if self._match_texts(("CASCADE", "RESTRICT")): 8597 cascade = self._prev.text.upper() 8598 8599 if self._curr: 8600 return self._parse_as_command(start) 8601 8602 return self.expression( 8603 exp.Revoke, 8604 privileges=privileges, 8605 kind=kind, 8606 securable=securable, 8607 principals=principals, 8608 grant_option=grant_option, 8609 cascade=cascade, 8610 ) 8611 8612 def _parse_overlay(self) -> exp.Overlay: 8613 return self.expression( 8614 exp.Overlay, 8615 **{ # type: ignore 8616 "this": self._parse_bitwise(), 8617 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8618 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8619 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8620 }, 8621 ) 8622 8623 def _parse_format_name(self) -> exp.Property: 8624 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8625 # for FILE_FORMAT = <format_name> 8626 return self.expression( 8627 exp.Property, 8628 this=exp.var("FORMAT_NAME"), 8629 value=self._parse_string() or self._parse_table_parts(), 8630 ) 8631 8632 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8633 args: t.List[exp.Expression] = [] 8634 8635 if self._match(TokenType.DISTINCT): 8636 args.append(self.expression(exp.Distinct, expressions=[self._parse_assignment()])) 8637 self._match(TokenType.COMMA) 8638 8639 args.extend(self._parse_csv(self._parse_assignment)) 8640 8641 return self.expression( 8642 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8643 ) 8644 8645 def _identifier_expression( 8646 self, token: t.Optional[Token] = None, **kwargs: t.Any 8647 ) -> exp.Identifier: 8648 token = token or self._prev 8649 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8650 expression.update_positions(token) 8651 return expression 8652 8653 def _build_pipe_cte( 8654 self, 8655 query: exp.Query, 8656 expressions: t.List[exp.Expression], 8657 alias_cte: t.Optional[exp.TableAlias] = None, 8658 ) -> exp.Select: 8659 new_cte: t.Optional[t.Union[str, exp.TableAlias]] 8660 if alias_cte: 8661 new_cte = alias_cte 8662 else: 8663 self._pipe_cte_counter += 1 8664 new_cte = f"__tmp{self._pipe_cte_counter}" 8665 8666 with_ = query.args.get("with") 8667 ctes = with_.pop() if with_ else None 8668 8669 new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False) 8670 if ctes: 8671 new_select.set("with", ctes) 8672 8673 return new_select.with_(new_cte, as_=query, copy=False) 8674 8675 def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select: 8676 select = self._parse_select(consume_pipe=False) 8677 if not select: 8678 return query 8679 8680 return self._build_pipe_cte( 8681 query=query.select(*select.expressions, append=False), expressions=[exp.Star()] 8682 ) 8683 8684 def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select: 8685 limit = self._parse_limit() 8686 offset = self._parse_offset() 8687 if limit: 8688 curr_limit = query.args.get("limit", limit) 8689 if curr_limit.expression.to_py() >= limit.expression.to_py(): 8690 query.limit(limit, copy=False) 8691 if offset: 8692 curr_offset = query.args.get("offset") 8693 curr_offset = curr_offset.expression.to_py() if curr_offset else 0 8694 query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False) 8695 8696 return query 8697 8698 def _parse_pipe_syntax_aggregate_fields(self) -> t.Optional[exp.Expression]: 8699 this = self._parse_assignment() 8700 if self._match_text_seq("GROUP", "AND", advance=False): 8701 return this 8702 8703 this = self._parse_alias(this) 8704 8705 if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): 8706 return self._parse_ordered(lambda: this) 8707 8708 return this 8709 8710 def _parse_pipe_syntax_aggregate_group_order_by( 8711 self, query: exp.Select, group_by_exists: bool = True 8712 ) -> exp.Select: 8713 expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) 8714 aggregates_or_groups, orders = [], [] 8715 for element in expr: 8716 if isinstance(element, exp.Ordered): 8717 this = element.this 8718 if isinstance(this, exp.Alias): 8719 element.set("this", this.args["alias"]) 8720 orders.append(element) 8721 else: 8722 this = element 8723 aggregates_or_groups.append(this) 8724 8725 if group_by_exists: 8726 query.select(*aggregates_or_groups, copy=False).group_by( 8727 *[projection.args.get("alias", projection) for projection in aggregates_or_groups], 8728 copy=False, 8729 ) 8730 else: 8731 query.select(*aggregates_or_groups, append=False, copy=False) 8732 8733 if orders: 8734 return query.order_by(*orders, append=False, copy=False) 8735 8736 return query 8737 8738 def _parse_pipe_syntax_aggregate(self, query: exp.Select) -> exp.Select: 8739 self._match_text_seq("AGGREGATE") 8740 query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) 8741 8742 if self._match(TokenType.GROUP_BY) or ( 8743 self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) 8744 ): 8745 query = self._parse_pipe_syntax_aggregate_group_order_by(query) 8746 8747 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8748 8749 def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> t.Optional[exp.Query]: 8750 first_setop = self.parse_set_operation(this=query) 8751 if not first_setop: 8752 return None 8753 8754 def _parse_and_unwrap_query() -> t.Optional[exp.Select]: 8755 expr = self._parse_paren() 8756 return expr.assert_is(exp.Subquery).unnest() if expr else None 8757 8758 first_setop.this.pop() 8759 8760 setops = [ 8761 first_setop.expression.pop().assert_is(exp.Subquery).unnest(), 8762 *self._parse_csv(_parse_and_unwrap_query), 8763 ] 8764 8765 query = self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8766 with_ = query.args.get("with") 8767 ctes = with_.pop() if with_ else None 8768 8769 if isinstance(first_setop, exp.Union): 8770 query = query.union(*setops, copy=False, **first_setop.args) 8771 elif isinstance(first_setop, exp.Except): 8772 query = query.except_(*setops, copy=False, **first_setop.args) 8773 else: 8774 query = query.intersect(*setops, copy=False, **first_setop.args) 8775 8776 query.set("with", ctes) 8777 8778 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8779 8780 def _parse_pipe_syntax_join(self, query: exp.Query) -> t.Optional[exp.Query]: 8781 join = self._parse_join() 8782 if not join: 8783 return None 8784 8785 if isinstance(query, exp.Select): 8786 return query.join(join, copy=False) 8787 8788 return query 8789 8790 def _parse_pipe_syntax_pivot(self, query: exp.Select) -> exp.Select: 8791 pivots = self._parse_pivots() 8792 if not pivots: 8793 return query 8794 8795 from_ = query.args.get("from") 8796 if from_: 8797 from_.this.set("pivots", pivots) 8798 8799 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8800 8801 def _parse_pipe_syntax_extend(self, query: exp.Select) -> exp.Select: 8802 self._match_text_seq("EXTEND") 8803 query.select(*[exp.Star(), *self._parse_expressions()], append=False, copy=False) 8804 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8805 8806 def _parse_pipe_syntax_tablesample(self, query: exp.Select) -> exp.Select: 8807 sample = self._parse_table_sample() 8808 8809 with_ = query.args.get("with") 8810 if with_: 8811 with_.expressions[-1].this.set("sample", sample) 8812 else: 8813 query.set("sample", sample) 8814 8815 return query 8816 8817 def _parse_pipe_syntax_query(self, query: exp.Query) -> t.Optional[exp.Query]: 8818 if isinstance(query, exp.Subquery): 8819 query = exp.select("*").from_(query, copy=False) 8820 8821 if not query.args.get("from"): 8822 query = exp.select("*").from_(query.subquery(copy=False), copy=False) 8823 8824 while self._match(TokenType.PIPE_GT): 8825 start = self._curr 8826 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper()) 8827 if not parser: 8828 # The set operators (UNION, etc) and the JOIN operator have a few common starting 8829 # keywords, making it tricky to disambiguate them without lookahead. The approach 8830 # here is to try and parse a set operation and if that fails, then try to parse a 8831 # join operator. If that fails as well, then the operator is not supported. 8832 parsed_query = self._parse_pipe_syntax_set_operator(query) 8833 parsed_query = parsed_query or self._parse_pipe_syntax_join(query) 8834 if not parsed_query: 8835 self._retreat(start) 8836 self.raise_error(f"Unsupported pipe syntax operator: '{start.text.upper()}'.") 8837 break 8838 query = parsed_query 8839 else: 8840 query = parser(self, query) 8841 8842 return query 8843 8844 def _parse_declareitem(self) -> t.Optional[exp.DeclareItem]: 8845 vars = self._parse_csv(self._parse_id_var) 8846 if not vars: 8847 return None 8848 8849 return self.expression( 8850 exp.DeclareItem, 8851 this=vars, 8852 kind=self._parse_types(), 8853 default=self._match(TokenType.DEFAULT) and self._parse_bitwise(), 8854 ) 8855 8856 def _parse_declare(self) -> exp.Declare | exp.Command: 8857 start = self._prev 8858 expressions = self._try_parse(lambda: self._parse_csv(self._parse_declareitem)) 8859 8860 if not expressions or self._curr: 8861 return self._parse_as_command(start) 8862 8863 return self.expression(exp.Declare, expressions=expressions) 8864 8865 def build_cast(self, strict: bool, **kwargs) -> exp.Cast: 8866 exp_class = exp.Cast if strict else exp.TryCast 8867 8868 if exp_class == exp.TryCast: 8869 kwargs["requires_string"] = self.dialect.TRY_CAST_REQUIRES_STRING 8870 8871 return self.expression(exp_class, **kwargs) 8872 8873 def _parse_json_value(self) -> exp.JSONValue: 8874 this = self._parse_bitwise() 8875 self._match(TokenType.COMMA) 8876 path = self._parse_bitwise() 8877 8878 returning = self._match(TokenType.RETURNING) and self._parse_type() 8879 8880 return self.expression( 8881 exp.JSONValue, 8882 this=this, 8883 path=self.dialect.to_json_path(path), 8884 returning=returning, 8885 on_condition=self._parse_on_condition(), 8886 ) 8887 8888 def _parse_group_concat(self) -> t.Optional[exp.Expression]: 8889 def concat_exprs( 8890 node: t.Optional[exp.Expression], exprs: t.List[exp.Expression] 8891 ) -> exp.Expression: 8892 if isinstance(node, exp.Distinct) and len(node.expressions) > 1: 8893 concat_exprs = [ 8894 self.expression(exp.Concat, expressions=node.expressions, safe=True) 8895 ] 8896 node.set("expressions", concat_exprs) 8897 return node 8898 if len(exprs) == 1: 8899 return exprs[0] 8900 return self.expression(exp.Concat, expressions=args, safe=True) 8901 8902 args = self._parse_csv(self._parse_lambda) 8903 8904 if args: 8905 order = args[-1] if isinstance(args[-1], exp.Order) else None 8906 8907 if order: 8908 # Order By is the last (or only) expression in the list and has consumed the 'expr' before it, 8909 # remove 'expr' from exp.Order and add it back to args 8910 args[-1] = order.this 8911 order.set("this", concat_exprs(order.this, args)) 8912 8913 this = order or concat_exprs(args[0], args) 8914 else: 8915 this = None 8916 8917 separator = self._parse_field() if self._match(TokenType.SEPARATOR) else None 8918 8919 return self.expression(exp.GroupConcat, this=this, separator=separator)
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1589 def __init__( 1590 self, 1591 error_level: t.Optional[ErrorLevel] = None, 1592 error_message_context: int = 100, 1593 max_errors: int = 3, 1594 dialect: DialectType = None, 1595 ): 1596 from sqlglot.dialects import Dialect 1597 1598 self.error_level = error_level or ErrorLevel.IMMEDIATE 1599 self.error_message_context = error_message_context 1600 self.max_errors = max_errors 1601 self.dialect = Dialect.get_or_raise(dialect) 1602 self.reset()
1615 def parse( 1616 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1617 ) -> t.List[t.Optional[exp.Expression]]: 1618 """ 1619 Parses a list of tokens and returns a list of syntax trees, one tree 1620 per parsed SQL statement. 1621 1622 Args: 1623 raw_tokens: The list of tokens. 1624 sql: The original SQL string, used to produce helpful debug messages. 1625 1626 Returns: 1627 The list of the produced syntax trees. 1628 """ 1629 return self._parse( 1630 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1631 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1633 def parse_into( 1634 self, 1635 expression_types: exp.IntoType, 1636 raw_tokens: t.List[Token], 1637 sql: t.Optional[str] = None, 1638 ) -> t.List[t.Optional[exp.Expression]]: 1639 """ 1640 Parses a list of tokens into a given Expression type. If a collection of Expression 1641 types is given instead, this method will try to parse the token list into each one 1642 of them, stopping at the first for which the parsing succeeds. 1643 1644 Args: 1645 expression_types: The expression type(s) to try and parse the token list into. 1646 raw_tokens: The list of tokens. 1647 sql: The original SQL string, used to produce helpful debug messages. 1648 1649 Returns: 1650 The target Expression. 1651 """ 1652 errors = [] 1653 for expression_type in ensure_list(expression_types): 1654 parser = self.EXPRESSION_PARSERS.get(expression_type) 1655 if not parser: 1656 raise TypeError(f"No parser registered for {expression_type}") 1657 1658 try: 1659 return self._parse(parser, raw_tokens, sql) 1660 except ParseError as e: 1661 e.errors[0]["into_expression"] = expression_type 1662 errors.append(e) 1663 1664 raise ParseError( 1665 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1666 errors=merge_errors(errors), 1667 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1707 def check_errors(self) -> None: 1708 """Logs or raises any found errors, depending on the chosen error level setting.""" 1709 if self.error_level == ErrorLevel.WARN: 1710 for error in self.errors: 1711 logger.error(str(error)) 1712 elif self.error_level == ErrorLevel.RAISE and self.errors: 1713 raise ParseError( 1714 concat_messages(self.errors, self.max_errors), 1715 errors=merge_errors(self.errors), 1716 )
Logs or raises any found errors, depending on the chosen error level setting.
1718 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1719 """ 1720 Appends an error in the list of recorded errors or raises it, depending on the chosen 1721 error level setting. 1722 """ 1723 token = token or self._curr or self._prev or Token.string("") 1724 start = token.start 1725 end = token.end + 1 1726 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1727 highlight = self.sql[start:end] 1728 end_context = self.sql[end : end + self.error_message_context] 1729 1730 error = ParseError.new( 1731 f"{message}. Line {token.line}, Col: {token.col}.\n" 1732 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1733 description=message, 1734 line=token.line, 1735 col=token.col, 1736 start_context=start_context, 1737 highlight=highlight, 1738 end_context=end_context, 1739 ) 1740 1741 if self.error_level == ErrorLevel.IMMEDIATE: 1742 raise error 1743 1744 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1746 def expression( 1747 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1748 ) -> E: 1749 """ 1750 Creates a new, validated Expression. 1751 1752 Args: 1753 exp_class: The expression class to instantiate. 1754 comments: An optional list of comments to attach to the expression. 1755 kwargs: The arguments to set for the expression along with their respective values. 1756 1757 Returns: 1758 The target expression. 1759 """ 1760 instance = exp_class(**kwargs) 1761 instance.add_comments(comments) if comments else self._add_comments(instance) 1762 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1769 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1770 """ 1771 Validates an Expression, making sure that all its mandatory arguments are set. 1772 1773 Args: 1774 expression: The expression to validate. 1775 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1776 1777 Returns: 1778 The validated expression. 1779 """ 1780 if self.error_level != ErrorLevel.IGNORE: 1781 for error_message in expression.error_messages(args): 1782 self.raise_error(error_message) 1783 1784 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.
4867 def parse_set_operation( 4868 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4869 ) -> t.Optional[exp.Expression]: 4870 start = self._index 4871 _, side_token, kind_token = self._parse_join_parts() 4872 4873 side = side_token.text if side_token else None 4874 kind = kind_token.text if kind_token else None 4875 4876 if not self._match_set(self.SET_OPERATIONS): 4877 self._retreat(start) 4878 return None 4879 4880 token_type = self._prev.token_type 4881 4882 if token_type == TokenType.UNION: 4883 operation: t.Type[exp.SetOperation] = exp.Union 4884 elif token_type == TokenType.EXCEPT: 4885 operation = exp.Except 4886 else: 4887 operation = exp.Intersect 4888 4889 comments = self._prev.comments 4890 4891 if self._match(TokenType.DISTINCT): 4892 distinct: t.Optional[bool] = True 4893 elif self._match(TokenType.ALL): 4894 distinct = False 4895 else: 4896 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4897 if distinct is None: 4898 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4899 4900 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4901 "STRICT", "CORRESPONDING" 4902 ) 4903 if self._match_text_seq("CORRESPONDING"): 4904 by_name = True 4905 if not side and not kind: 4906 kind = "INNER" 4907 4908 on_column_list = None 4909 if by_name and self._match_texts(("ON", "BY")): 4910 on_column_list = self._parse_wrapped_csv(self._parse_column) 4911 4912 expression = self._parse_select( 4913 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4914 ) 4915 4916 return self.expression( 4917 operation, 4918 comments=comments, 4919 this=this, 4920 distinct=distinct, 4921 by_name=by_name, 4922 expression=expression, 4923 side=side, 4924 kind=kind, 4925 on=on_column_list, 4926 )