sqlglot.dialects.bigquery
1from __future__ import annotations 2 3import logging 4import re 5import typing as t 6 7from sqlglot import exp, generator, parser, tokens, transforms 8from sqlglot._typing import E 9from sqlglot.dialects.dialect import ( 10 Dialect, 11 NormalizationStrategy, 12 annotate_with_type_lambda, 13 arg_max_or_min_no_count, 14 binary_from_function, 15 date_add_interval_sql, 16 datestrtodate_sql, 17 build_formatted_time, 18 filter_array_using_unnest, 19 if_sql, 20 inline_array_unless_query, 21 max_or_greatest, 22 min_or_least, 23 no_ilike_sql, 24 build_date_delta_with_interval, 25 regexp_replace_sql, 26 rename_func, 27 sha256_sql, 28 timestrtotime_sql, 29 ts_or_ds_add_cast, 30 unit_to_var, 31 strposition_sql, 32 groupconcat_sql, 33 space_sql, 34) 35from sqlglot.helper import seq_get, split_num_words 36from sqlglot.tokens import TokenType 37from sqlglot.generator import unsupported_args 38 39if t.TYPE_CHECKING: 40 from sqlglot._typing import Lit 41 42 from sqlglot.optimizer.annotate_types import TypeAnnotator 43 44logger = logging.getLogger("sqlglot") 45 46 47JSON_EXTRACT_TYPE = t.Union[exp.JSONExtract, exp.JSONExtractScalar, exp.JSONExtractArray] 48 49DQUOTES_ESCAPING_JSON_FUNCTIONS = ("JSON_QUERY", "JSON_VALUE", "JSON_QUERY_ARRAY") 50 51 52def _derived_table_values_to_unnest(self: BigQuery.Generator, expression: exp.Values) -> str: 53 if not expression.find_ancestor(exp.From, exp.Join): 54 return self.values_sql(expression) 55 56 structs = [] 57 alias = expression.args.get("alias") 58 for tup in expression.find_all(exp.Tuple): 59 field_aliases = ( 60 alias.columns 61 if alias and alias.columns 62 else (f"_c{i}" for i in range(len(tup.expressions))) 63 ) 64 expressions = [ 65 exp.PropertyEQ(this=exp.to_identifier(name), expression=fld) 66 for name, fld in zip(field_aliases, tup.expressions) 67 ] 68 structs.append(exp.Struct(expressions=expressions)) 69 70 # Due to `UNNEST_COLUMN_ONLY`, it is expected that the table alias be contained in the columns expression 71 alias_name_only = exp.TableAlias(columns=[alias.this]) if alias else None 72 return self.unnest_sql( 73 exp.Unnest(expressions=[exp.array(*structs, copy=False)], alias=alias_name_only) 74 ) 75 76 77def _returnsproperty_sql(self: BigQuery.Generator, expression: exp.ReturnsProperty) -> str: 78 this = expression.this 79 if isinstance(this, exp.Schema): 80 this = f"{self.sql(this, 'this')} <{self.expressions(this)}>" 81 else: 82 this = self.sql(this) 83 return f"RETURNS {this}" 84 85 86def _create_sql(self: BigQuery.Generator, expression: exp.Create) -> str: 87 returns = expression.find(exp.ReturnsProperty) 88 if expression.kind == "FUNCTION" and returns and returns.args.get("is_table"): 89 expression.set("kind", "TABLE FUNCTION") 90 91 if isinstance(expression.expression, (exp.Subquery, exp.Literal)): 92 expression.set("expression", expression.expression.this) 93 94 return self.create_sql(expression) 95 96 97# https://issuetracker.google.com/issues/162294746 98# workaround for bigquery bug when grouping by an expression and then ordering 99# WITH x AS (SELECT 1 y) 100# SELECT y + 1 z 101# FROM x 102# GROUP BY x + 1 103# ORDER by z 104def _alias_ordered_group(expression: exp.Expression) -> exp.Expression: 105 if isinstance(expression, exp.Select): 106 group = expression.args.get("group") 107 order = expression.args.get("order") 108 109 if group and order: 110 aliases = { 111 select.this: select.args["alias"] 112 for select in expression.selects 113 if isinstance(select, exp.Alias) 114 } 115 116 for grouped in group.expressions: 117 if grouped.is_int: 118 continue 119 alias = aliases.get(grouped) 120 if alias: 121 grouped.replace(exp.column(alias)) 122 123 return expression 124 125 126def _pushdown_cte_column_names(expression: exp.Expression) -> exp.Expression: 127 """BigQuery doesn't allow column names when defining a CTE, so we try to push them down.""" 128 if isinstance(expression, exp.CTE) and expression.alias_column_names: 129 cte_query = expression.this 130 131 if cte_query.is_star: 132 logger.warning( 133 "Can't push down CTE column names for star queries. Run the query through" 134 " the optimizer or use 'qualify' to expand the star projections first." 135 ) 136 return expression 137 138 column_names = expression.alias_column_names 139 expression.args["alias"].set("columns", None) 140 141 for name, select in zip(column_names, cte_query.selects): 142 to_replace = select 143 144 if isinstance(select, exp.Alias): 145 select = select.this 146 147 # Inner aliases are shadowed by the CTE column names 148 to_replace.replace(exp.alias_(select, name)) 149 150 return expression 151 152 153def _build_parse_timestamp(args: t.List) -> exp.StrToTime: 154 this = build_formatted_time(exp.StrToTime, "bigquery")([seq_get(args, 1), seq_get(args, 0)]) 155 this.set("zone", seq_get(args, 2)) 156 return this 157 158 159def _build_timestamp(args: t.List) -> exp.Timestamp: 160 timestamp = exp.Timestamp.from_arg_list(args) 161 timestamp.set("with_tz", True) 162 return timestamp 163 164 165def _build_date(args: t.List) -> exp.Date | exp.DateFromParts: 166 expr_type = exp.DateFromParts if len(args) == 3 else exp.Date 167 return expr_type.from_arg_list(args) 168 169 170def _build_to_hex(args: t.List) -> exp.Hex | exp.MD5: 171 # TO_HEX(MD5(..)) is common in BigQuery, so it's parsed into MD5 to simplify its transpilation 172 arg = seq_get(args, 0) 173 return exp.MD5(this=arg.this) if isinstance(arg, exp.MD5Digest) else exp.LowerHex(this=arg) 174 175 176def _array_contains_sql(self: BigQuery.Generator, expression: exp.ArrayContains) -> str: 177 return self.sql( 178 exp.Exists( 179 this=exp.select("1") 180 .from_(exp.Unnest(expressions=[expression.left]).as_("_unnest", table=["_col"])) 181 .where(exp.column("_col").eq(expression.right)) 182 ) 183 ) 184 185 186def _ts_or_ds_add_sql(self: BigQuery.Generator, expression: exp.TsOrDsAdd) -> str: 187 return date_add_interval_sql("DATE", "ADD")(self, ts_or_ds_add_cast(expression)) 188 189 190def _ts_or_ds_diff_sql(self: BigQuery.Generator, expression: exp.TsOrDsDiff) -> str: 191 expression.this.replace(exp.cast(expression.this, exp.DataType.Type.TIMESTAMP)) 192 expression.expression.replace(exp.cast(expression.expression, exp.DataType.Type.TIMESTAMP)) 193 unit = unit_to_var(expression) 194 return self.func("DATE_DIFF", expression.this, expression.expression, unit) 195 196 197def _unix_to_time_sql(self: BigQuery.Generator, expression: exp.UnixToTime) -> str: 198 scale = expression.args.get("scale") 199 timestamp = expression.this 200 201 if scale in (None, exp.UnixToTime.SECONDS): 202 return self.func("TIMESTAMP_SECONDS", timestamp) 203 if scale == exp.UnixToTime.MILLIS: 204 return self.func("TIMESTAMP_MILLIS", timestamp) 205 if scale == exp.UnixToTime.MICROS: 206 return self.func("TIMESTAMP_MICROS", timestamp) 207 208 unix_seconds = exp.cast( 209 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 210 ) 211 return self.func("TIMESTAMP_SECONDS", unix_seconds) 212 213 214def _build_time(args: t.List) -> exp.Func: 215 if len(args) == 1: 216 return exp.TsOrDsToTime(this=args[0]) 217 if len(args) == 2: 218 return exp.Time.from_arg_list(args) 219 return exp.TimeFromParts.from_arg_list(args) 220 221 222def _build_datetime(args: t.List) -> exp.Func: 223 if len(args) == 1: 224 return exp.TsOrDsToDatetime.from_arg_list(args) 225 if len(args) == 2: 226 return exp.Datetime.from_arg_list(args) 227 return exp.TimestampFromParts.from_arg_list(args) 228 229 230def _build_regexp_extract( 231 expr_type: t.Type[E], default_group: t.Optional[exp.Expression] = None 232) -> t.Callable[[t.List], E]: 233 def _builder(args: t.List) -> E: 234 try: 235 group = re.compile(args[1].name).groups == 1 236 except re.error: 237 group = False 238 239 # Default group is used for the transpilation of REGEXP_EXTRACT_ALL 240 return expr_type( 241 this=seq_get(args, 0), 242 expression=seq_get(args, 1), 243 position=seq_get(args, 2), 244 occurrence=seq_get(args, 3), 245 group=exp.Literal.number(1) if group else default_group, 246 ) 247 248 return _builder 249 250 251def _build_extract_json_with_default_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 252 def _builder(args: t.List, dialect: Dialect) -> E: 253 if len(args) == 1: 254 # The default value for the JSONPath is '$' i.e all of the data 255 args.append(exp.Literal.string("$")) 256 return parser.build_extract_json_with_path(expr_type)(args, dialect) 257 258 return _builder 259 260 261def _str_to_datetime_sql( 262 self: BigQuery.Generator, expression: exp.StrToDate | exp.StrToTime 263) -> str: 264 this = self.sql(expression, "this") 265 dtype = "DATE" if isinstance(expression, exp.StrToDate) else "TIMESTAMP" 266 267 if expression.args.get("safe"): 268 fmt = self.format_time( 269 expression, 270 self.dialect.INVERSE_FORMAT_MAPPING, 271 self.dialect.INVERSE_FORMAT_TRIE, 272 ) 273 return f"SAFE_CAST({this} AS {dtype} FORMAT {fmt})" 274 275 fmt = self.format_time(expression) 276 return self.func(f"PARSE_{dtype}", fmt, this, expression.args.get("zone")) 277 278 279def _annotate_math_functions(self: TypeAnnotator, expression: E) -> E: 280 """ 281 Many BigQuery math functions such as CEIL, FLOOR etc follow this return type convention: 282 +---------+---------+---------+------------+---------+ 283 | INPUT | INT64 | NUMERIC | BIGNUMERIC | FLOAT64 | 284 +---------+---------+---------+------------+---------+ 285 | OUTPUT | FLOAT64 | NUMERIC | BIGNUMERIC | FLOAT64 | 286 +---------+---------+---------+------------+---------+ 287 """ 288 self._annotate_args(expression) 289 290 this: exp.Expression = expression.this 291 292 self._set_type( 293 expression, 294 exp.DataType.Type.DOUBLE if this.is_type(*exp.DataType.INTEGER_TYPES) else this.type, 295 ) 296 return expression 297 298 299@unsupported_args("ins_cost", "del_cost", "sub_cost") 300def _levenshtein_sql(self: BigQuery.Generator, expression: exp.Levenshtein) -> str: 301 max_dist = expression.args.get("max_dist") 302 if max_dist: 303 max_dist = exp.Kwarg(this=exp.var("max_distance"), expression=max_dist) 304 305 return self.func("EDIT_DISTANCE", expression.this, expression.expression, max_dist) 306 307 308def _build_levenshtein(args: t.List) -> exp.Levenshtein: 309 max_dist = seq_get(args, 2) 310 return exp.Levenshtein( 311 this=seq_get(args, 0), 312 expression=seq_get(args, 1), 313 max_dist=max_dist.expression if max_dist else None, 314 ) 315 316 317def _build_format_time(expr_type: t.Type[exp.Expression]) -> t.Callable[[t.List], exp.TimeToStr]: 318 def _builder(args: t.List) -> exp.TimeToStr: 319 return exp.TimeToStr( 320 this=expr_type(this=seq_get(args, 1)), 321 format=seq_get(args, 0), 322 zone=seq_get(args, 2), 323 ) 324 325 return _builder 326 327 328def _build_contains_substring(args: t.List) -> exp.Contains | exp.Anonymous: 329 if len(args) == 3: 330 return exp.Anonymous(this="CONTAINS_SUBSTR", expressions=args) 331 332 # Lowercase the operands in case of transpilation, as exp.Contains 333 # is case-sensitive on other dialects 334 this = exp.Lower(this=seq_get(args, 0)) 335 expr = exp.Lower(this=seq_get(args, 1)) 336 337 return exp.Contains(this=this, expression=expr) 338 339 340def _json_extract_sql(self: BigQuery.Generator, expression: JSON_EXTRACT_TYPE) -> str: 341 name = (expression._meta and expression.meta.get("name")) or expression.sql_name() 342 upper = name.upper() 343 344 dquote_escaping = upper in DQUOTES_ESCAPING_JSON_FUNCTIONS 345 346 if dquote_escaping: 347 self._quote_json_path_key_using_brackets = False 348 349 sql = rename_func(upper)(self, expression) 350 351 if dquote_escaping: 352 self._quote_json_path_key_using_brackets = True 353 354 return sql 355 356 357def _annotate_concat(self: TypeAnnotator, expression: exp.Concat) -> exp.Concat: 358 annotated = self._annotate_by_args(expression, "expressions") 359 360 # Args must be BYTES or types that can be cast to STRING, return type is either BYTES or STRING 361 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#concat 362 if not annotated.is_type(exp.DataType.Type.BINARY, exp.DataType.Type.UNKNOWN): 363 annotated.type = exp.DataType.Type.VARCHAR 364 365 return annotated 366 367 368def _annotate_array(self: TypeAnnotator, expression: exp.Array) -> exp.Array: 369 array_args = expression.expressions 370 371 # BigQuery behaves as follows: 372 # 373 # SELECT t, TYPEOF(t) FROM (SELECT 'foo') AS t -- foo, STRUCT<STRING> 374 # SELECT ARRAY(SELECT 'foo'), TYPEOF(ARRAY(SELECT 'foo')) -- foo, ARRAY<STRING> 375 if ( 376 len(array_args) == 1 377 and isinstance(select := array_args[0].unnest(), exp.Select) 378 and (query_type := select.meta.get("query_type")) is not None 379 and query_type.is_type(exp.DataType.Type.STRUCT) 380 and len(query_type.expressions) == 1 381 and isinstance(col_def := query_type.expressions[0], exp.ColumnDef) 382 and (projection_type := col_def.kind) is not None 383 and not projection_type.is_type(exp.DataType.Type.UNKNOWN) 384 ): 385 array_type = exp.DataType( 386 this=exp.DataType.Type.ARRAY, 387 expressions=[projection_type.copy()], 388 nested=True, 389 ) 390 return self._annotate_with_type(expression, array_type) 391 392 return self._annotate_by_args(expression, "expressions", array=True) 393 394 395class BigQuery(Dialect): 396 WEEK_OFFSET = -1 397 UNNEST_COLUMN_ONLY = True 398 SUPPORTS_USER_DEFINED_TYPES = False 399 SUPPORTS_SEMI_ANTI_JOIN = False 400 LOG_BASE_FIRST = False 401 HEX_LOWERCASE = True 402 FORCE_EARLY_ALIAS_REF_EXPANSION = True 403 PRESERVE_ORIGINAL_NAMES = True 404 HEX_STRING_IS_INTEGER_TYPE = True 405 406 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 407 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 408 409 # bigquery udfs are case sensitive 410 NORMALIZE_FUNCTIONS = False 411 412 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 413 TIME_MAPPING = { 414 "%D": "%m/%d/%y", 415 "%E6S": "%S.%f", 416 "%e": "%-d", 417 } 418 419 FORMAT_MAPPING = { 420 "DD": "%d", 421 "MM": "%m", 422 "MON": "%b", 423 "MONTH": "%B", 424 "YYYY": "%Y", 425 "YY": "%y", 426 "HH": "%I", 427 "HH12": "%I", 428 "HH24": "%H", 429 "MI": "%M", 430 "SS": "%S", 431 "SSSSS": "%f", 432 "TZH": "%z", 433 } 434 435 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 436 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 437 # https://cloud.google.com/bigquery/docs/querying-wildcard-tables#scanning_a_range_of_tables_using_table_suffix 438 # https://cloud.google.com/bigquery/docs/query-cloud-storage-data#query_the_file_name_pseudo-column 439 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE", "_TABLE_SUFFIX", "_FILE_NAME"} 440 441 # All set operations require either a DISTINCT or ALL specifier 442 SET_OP_DISTINCT_BY_DEFAULT = dict.fromkeys((exp.Except, exp.Intersect, exp.Union), None) 443 444 # BigQuery maps Type.TIMESTAMP to DATETIME, so we need to amend the inferred types 445 TYPE_TO_EXPRESSIONS = { 446 **Dialect.TYPE_TO_EXPRESSIONS, 447 exp.DataType.Type.TIMESTAMPTZ: Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.TIMESTAMP], 448 } 449 TYPE_TO_EXPRESSIONS.pop(exp.DataType.Type.TIMESTAMP) 450 451 ANNOTATORS = { 452 **Dialect.ANNOTATORS, 453 **{ 454 expr_type: annotate_with_type_lambda(data_type) 455 for data_type, expressions in TYPE_TO_EXPRESSIONS.items() 456 for expr_type in expressions 457 }, 458 **{ 459 expr_type: lambda self, e: _annotate_math_functions(self, e) 460 for expr_type in (exp.Floor, exp.Ceil, exp.Log, exp.Ln, exp.Sqrt, exp.Exp, exp.Round) 461 }, 462 **{ 463 expr_type: lambda self, e: self._annotate_by_args(e, "this") 464 for expr_type in ( 465 exp.Left, 466 exp.Right, 467 exp.Lower, 468 exp.Upper, 469 exp.Pad, 470 exp.Trim, 471 exp.RegexpExtract, 472 exp.RegexpReplace, 473 exp.Repeat, 474 exp.Substring, 475 ) 476 }, 477 exp.Array: _annotate_array, 478 exp.ArrayConcat: lambda self, e: self._annotate_by_args(e, "this", "expressions"), 479 exp.Ascii: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 480 exp.BitwiseAndAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 481 exp.BitwiseOrAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 482 exp.BitwiseXorAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 483 exp.BitwiseCountAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 484 exp.Concat: _annotate_concat, 485 exp.Corr: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE), 486 exp.CovarPop: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE), 487 exp.CovarSamp: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE), 488 exp.JSONArray: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.JSON), 489 exp.JSONExtractScalar: lambda self, e: self._annotate_with_type( 490 e, exp.DataType.Type.VARCHAR 491 ), 492 exp.JSONValueArray: lambda self, e: self._annotate_with_type( 493 e, exp.DataType.build("ARRAY<VARCHAR>") 494 ), 495 exp.JSONType: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR), 496 exp.Lag: lambda self, e: self._annotate_by_args(e, "this", "default"), 497 exp.SHA: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY), 498 exp.SHA2: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY), 499 exp.Sign: lambda self, e: self._annotate_by_args(e, "this"), 500 exp.Split: lambda self, e: self._annotate_by_args(e, "this", array=True), 501 exp.TimestampFromParts: lambda self, e: self._annotate_with_type( 502 e, exp.DataType.Type.DATETIME 503 ), 504 exp.Unicode: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 505 } 506 507 def normalize_identifier(self, expression: E) -> E: 508 if ( 509 isinstance(expression, exp.Identifier) 510 and self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE 511 ): 512 parent = expression.parent 513 while isinstance(parent, exp.Dot): 514 parent = parent.parent 515 516 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 517 # by default. The following check uses a heuristic to detect tables based on whether 518 # they are qualified. This should generally be correct, because tables in BigQuery 519 # must be qualified with at least a dataset, unless @@dataset_id is set. 520 case_sensitive = ( 521 isinstance(parent, exp.UserDefinedFunction) 522 or ( 523 isinstance(parent, exp.Table) 524 and parent.db 525 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 526 ) 527 or expression.meta.get("is_table") 528 ) 529 if not case_sensitive: 530 expression.set("this", expression.this.lower()) 531 532 return t.cast(E, expression) 533 534 return super().normalize_identifier(expression) 535 536 class Tokenizer(tokens.Tokenizer): 537 QUOTES = ["'", '"', '"""', "'''"] 538 COMMENTS = ["--", "#", ("/*", "*/")] 539 IDENTIFIERS = ["`"] 540 STRING_ESCAPES = ["\\"] 541 542 HEX_STRINGS = [("0x", ""), ("0X", "")] 543 544 BYTE_STRINGS = [ 545 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 546 ] 547 548 RAW_STRINGS = [ 549 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 550 ] 551 552 NESTED_COMMENTS = False 553 554 KEYWORDS = { 555 **tokens.Tokenizer.KEYWORDS, 556 "ANY TYPE": TokenType.VARIANT, 557 "BEGIN": TokenType.COMMAND, 558 "BEGIN TRANSACTION": TokenType.BEGIN, 559 "BYTEINT": TokenType.INT, 560 "BYTES": TokenType.BINARY, 561 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 562 "DATETIME": TokenType.TIMESTAMP, 563 "DECLARE": TokenType.DECLARE, 564 "ELSEIF": TokenType.COMMAND, 565 "EXCEPTION": TokenType.COMMAND, 566 "EXPORT": TokenType.EXPORT, 567 "FLOAT64": TokenType.DOUBLE, 568 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 569 "MODEL": TokenType.MODEL, 570 "NOT DETERMINISTIC": TokenType.VOLATILE, 571 "RECORD": TokenType.STRUCT, 572 "TIMESTAMP": TokenType.TIMESTAMPTZ, 573 } 574 KEYWORDS.pop("DIV") 575 KEYWORDS.pop("VALUES") 576 KEYWORDS.pop("/*+") 577 578 class Parser(parser.Parser): 579 PREFIXED_PIVOT_COLUMNS = True 580 LOG_DEFAULTS_TO_LN = True 581 SUPPORTS_IMPLICIT_UNNEST = True 582 JOINS_HAVE_EQUAL_PRECEDENCE = True 583 584 # BigQuery does not allow ASC/DESC to be used as an identifier 585 ID_VAR_TOKENS = parser.Parser.ID_VAR_TOKENS - {TokenType.ASC, TokenType.DESC} 586 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - {TokenType.ASC, TokenType.DESC} 587 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - {TokenType.ASC, TokenType.DESC} 588 COMMENT_TABLE_ALIAS_TOKENS = parser.Parser.COMMENT_TABLE_ALIAS_TOKENS - { 589 TokenType.ASC, 590 TokenType.DESC, 591 } 592 UPDATE_ALIAS_TOKENS = parser.Parser.UPDATE_ALIAS_TOKENS - {TokenType.ASC, TokenType.DESC} 593 594 FUNCTIONS = { 595 **parser.Parser.FUNCTIONS, 596 "CONTAINS_SUBSTR": _build_contains_substring, 597 "DATE": _build_date, 598 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 599 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 600 "DATE_TRUNC": lambda args: exp.DateTrunc( 601 unit=seq_get(args, 1), 602 this=seq_get(args, 0), 603 zone=seq_get(args, 2), 604 ), 605 "DATETIME": _build_datetime, 606 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 607 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 608 "DIV": binary_from_function(exp.IntDiv), 609 "EDIT_DISTANCE": _build_levenshtein, 610 "FORMAT_DATE": _build_format_time(exp.TsOrDsToDate), 611 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 612 "JSON_EXTRACT_SCALAR": _build_extract_json_with_default_path(exp.JSONExtractScalar), 613 "JSON_EXTRACT_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 614 "JSON_QUERY": parser.build_extract_json_with_path(exp.JSONExtract), 615 "JSON_QUERY_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 616 "JSON_VALUE": _build_extract_json_with_default_path(exp.JSONExtractScalar), 617 "JSON_VALUE_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray), 618 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 619 "MD5": exp.MD5Digest.from_arg_list, 620 "TO_HEX": _build_to_hex, 621 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 622 [seq_get(args, 1), seq_get(args, 0)] 623 ), 624 "PARSE_TIMESTAMP": _build_parse_timestamp, 625 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 626 "REGEXP_EXTRACT": _build_regexp_extract(exp.RegexpExtract), 627 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 628 "REGEXP_EXTRACT_ALL": _build_regexp_extract( 629 exp.RegexpExtractAll, default_group=exp.Literal.number(0) 630 ), 631 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 632 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 633 "SPLIT": lambda args: exp.Split( 634 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 635 this=seq_get(args, 0), 636 expression=seq_get(args, 1) or exp.Literal.string(","), 637 ), 638 "STRPOS": exp.StrPosition.from_arg_list, 639 "TIME": _build_time, 640 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 641 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 642 "TIMESTAMP": _build_timestamp, 643 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 644 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 645 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 646 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 647 ), 648 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 649 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 650 ), 651 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 652 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 653 "FORMAT_DATETIME": _build_format_time(exp.TsOrDsToDatetime), 654 "FORMAT_TIMESTAMP": _build_format_time(exp.TsOrDsToTimestamp), 655 } 656 657 FUNCTION_PARSERS = { 658 **parser.Parser.FUNCTION_PARSERS, 659 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 660 "JSON_ARRAY": lambda self: self.expression( 661 exp.JSONArray, expressions=self._parse_csv(self._parse_bitwise) 662 ), 663 "MAKE_INTERVAL": lambda self: self._parse_make_interval(), 664 "FEATURES_AT_TIME": lambda self: self._parse_features_at_time(), 665 } 666 FUNCTION_PARSERS.pop("TRIM") 667 668 NO_PAREN_FUNCTIONS = { 669 **parser.Parser.NO_PAREN_FUNCTIONS, 670 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 671 } 672 673 NESTED_TYPE_TOKENS = { 674 *parser.Parser.NESTED_TYPE_TOKENS, 675 TokenType.TABLE, 676 } 677 678 PROPERTY_PARSERS = { 679 **parser.Parser.PROPERTY_PARSERS, 680 "NOT DETERMINISTIC": lambda self: self.expression( 681 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 682 ), 683 "OPTIONS": lambda self: self._parse_with_property(), 684 } 685 686 CONSTRAINT_PARSERS = { 687 **parser.Parser.CONSTRAINT_PARSERS, 688 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 689 } 690 691 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 692 RANGE_PARSERS.pop(TokenType.OVERLAPS) 693 694 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 695 696 DASHED_TABLE_PART_FOLLOW_TOKENS = {TokenType.DOT, TokenType.L_PAREN, TokenType.R_PAREN} 697 698 STATEMENT_PARSERS = { 699 **parser.Parser.STATEMENT_PARSERS, 700 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 701 TokenType.END: lambda self: self._parse_as_command(self._prev), 702 TokenType.FOR: lambda self: self._parse_for_in(), 703 TokenType.EXPORT: lambda self: self._parse_export_data(), 704 TokenType.DECLARE: lambda self: self._parse_declare(), 705 } 706 707 BRACKET_OFFSETS = { 708 "OFFSET": (0, False), 709 "ORDINAL": (1, False), 710 "SAFE_OFFSET": (0, True), 711 "SAFE_ORDINAL": (1, True), 712 } 713 714 def _parse_for_in(self) -> exp.ForIn: 715 this = self._parse_range() 716 self._match_text_seq("DO") 717 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 718 719 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 720 this = super()._parse_table_part(schema=schema) or self._parse_number() 721 722 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 723 if isinstance(this, exp.Identifier): 724 table_name = this.name 725 while self._match(TokenType.DASH, advance=False) and self._next: 726 start = self._curr 727 while self._is_connected() and not self._match_set( 728 self.DASHED_TABLE_PART_FOLLOW_TOKENS, advance=False 729 ): 730 self._advance() 731 732 if start == self._curr: 733 break 734 735 table_name += self._find_sql(start, self._prev) 736 737 this = exp.Identifier( 738 this=table_name, quoted=this.args.get("quoted") 739 ).update_positions(this) 740 elif isinstance(this, exp.Literal): 741 table_name = this.name 742 743 if self._is_connected() and self._parse_var(any_token=True): 744 table_name += self._prev.text 745 746 this = exp.Identifier(this=table_name, quoted=True).update_positions(this) 747 748 return this 749 750 def _parse_table_parts( 751 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 752 ) -> exp.Table: 753 table = super()._parse_table_parts( 754 schema=schema, is_db_reference=is_db_reference, wildcard=True 755 ) 756 757 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 758 if not table.catalog: 759 if table.db: 760 previous_db = table.args["db"] 761 parts = table.db.split(".") 762 if len(parts) == 2 and not table.args["db"].quoted: 763 table.set( 764 "catalog", exp.Identifier(this=parts[0]).update_positions(previous_db) 765 ) 766 table.set("db", exp.Identifier(this=parts[1]).update_positions(previous_db)) 767 else: 768 previous_this = table.this 769 parts = table.name.split(".") 770 if len(parts) == 2 and not table.this.quoted: 771 table.set( 772 "db", exp.Identifier(this=parts[0]).update_positions(previous_this) 773 ) 774 table.set( 775 "this", exp.Identifier(this=parts[1]).update_positions(previous_this) 776 ) 777 778 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 779 alias = table.this 780 catalog, db, this, *rest = ( 781 exp.to_identifier(p, quoted=True) 782 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 783 ) 784 785 for part in (catalog, db, this): 786 if part: 787 part.update_positions(table.this) 788 789 if rest and this: 790 this = exp.Dot.build([this, *rest]) # type: ignore 791 792 table = exp.Table( 793 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 794 ) 795 table.meta["quoted_table"] = True 796 else: 797 alias = None 798 799 # The `INFORMATION_SCHEMA` views in BigQuery need to be qualified by a region or 800 # dataset, so if the project identifier is omitted we need to fix the ast so that 801 # the `INFORMATION_SCHEMA.X` bit is represented as a single (quoted) Identifier. 802 # Otherwise, we wouldn't correctly qualify a `Table` node that references these 803 # views, because it would seem like the "catalog" part is set, when it'd actually 804 # be the region/dataset. Merging the two identifiers into a single one is done to 805 # avoid producing a 4-part Table reference, which would cause issues in the schema 806 # module, when there are 3-part table names mixed with information schema views. 807 # 808 # See: https://cloud.google.com/bigquery/docs/information-schema-intro#syntax 809 table_parts = table.parts 810 if len(table_parts) > 1 and table_parts[-2].name.upper() == "INFORMATION_SCHEMA": 811 # We need to alias the table here to avoid breaking existing qualified columns. 812 # This is expected to be safe, because if there's an actual alias coming up in 813 # the token stream, it will overwrite this one. If there isn't one, we are only 814 # exposing the name that can be used to reference the view explicitly (a no-op). 815 exp.alias_( 816 table, 817 t.cast(exp.Identifier, alias or table_parts[-1]), 818 table=True, 819 copy=False, 820 ) 821 822 info_schema_view = f"{table_parts[-2].name}.{table_parts[-1].name}" 823 new_this = exp.Identifier(this=info_schema_view, quoted=True).update_positions( 824 line=table_parts[-2].meta.get("line"), 825 col=table_parts[-1].meta.get("col"), 826 start=table_parts[-2].meta.get("start"), 827 end=table_parts[-1].meta.get("end"), 828 ) 829 table.set("this", new_this) 830 table.set("db", seq_get(table_parts, -3)) 831 table.set("catalog", seq_get(table_parts, -4)) 832 833 return table 834 835 def _parse_column(self) -> t.Optional[exp.Expression]: 836 column = super()._parse_column() 837 if isinstance(column, exp.Column): 838 parts = column.parts 839 if any("." in p.name for p in parts): 840 catalog, db, table, this, *rest = ( 841 exp.to_identifier(p, quoted=True) 842 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 843 ) 844 845 if rest and this: 846 this = exp.Dot.build([this, *rest]) # type: ignore 847 848 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 849 column.meta["quoted_column"] = True 850 851 return column 852 853 @t.overload 854 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 855 856 @t.overload 857 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 858 859 def _parse_json_object(self, agg=False): 860 json_object = super()._parse_json_object() 861 array_kv_pair = seq_get(json_object.expressions, 0) 862 863 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 864 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 865 if ( 866 array_kv_pair 867 and isinstance(array_kv_pair.this, exp.Array) 868 and isinstance(array_kv_pair.expression, exp.Array) 869 ): 870 keys = array_kv_pair.this.expressions 871 values = array_kv_pair.expression.expressions 872 873 json_object.set( 874 "expressions", 875 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 876 ) 877 878 return json_object 879 880 def _parse_bracket( 881 self, this: t.Optional[exp.Expression] = None 882 ) -> t.Optional[exp.Expression]: 883 bracket = super()._parse_bracket(this) 884 885 if this is bracket: 886 return bracket 887 888 if isinstance(bracket, exp.Bracket): 889 for expression in bracket.expressions: 890 name = expression.name.upper() 891 892 if name not in self.BRACKET_OFFSETS: 893 break 894 895 offset, safe = self.BRACKET_OFFSETS[name] 896 bracket.set("offset", offset) 897 bracket.set("safe", safe) 898 expression.replace(expression.expressions[0]) 899 900 return bracket 901 902 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 903 unnest = super()._parse_unnest(with_alias=with_alias) 904 905 if not unnest: 906 return None 907 908 unnest_expr = seq_get(unnest.expressions, 0) 909 if unnest_expr: 910 from sqlglot.optimizer.annotate_types import annotate_types 911 912 unnest_expr = annotate_types(unnest_expr, dialect=self.dialect) 913 914 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 915 # in contrast to other dialects such as DuckDB which flattens only the array by default 916 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 917 array_elem.is_type(exp.DataType.Type.STRUCT) 918 for array_elem in unnest_expr._type.expressions 919 ): 920 unnest.set("explode_array", True) 921 922 return unnest 923 924 def _parse_make_interval(self) -> exp.MakeInterval: 925 expr = exp.MakeInterval() 926 927 for arg_key in expr.arg_types: 928 value = self._parse_lambda() 929 930 if not value: 931 break 932 933 # Non-named arguments are filled sequentially, (optionally) followed by named arguments 934 # that can appear in any order e.g MAKE_INTERVAL(1, minute => 5, day => 2) 935 if isinstance(value, exp.Kwarg): 936 arg_key = value.this.name 937 938 expr.set(arg_key, value) 939 940 self._match(TokenType.COMMA) 941 942 return expr 943 944 def _parse_features_at_time(self) -> exp.FeaturesAtTime: 945 expr = self.expression( 946 exp.FeaturesAtTime, 947 this=(self._match(TokenType.TABLE) and self._parse_table()) 948 or self._parse_select(nested=True), 949 ) 950 951 while self._match(TokenType.COMMA): 952 arg = self._parse_lambda() 953 954 # Get the LHS of the Kwarg and set the arg to that value, e.g 955 # "num_rows => 1" sets the expr's `num_rows` arg 956 if arg: 957 expr.set(arg.this.name, arg) 958 959 return expr 960 961 def _parse_export_data(self) -> exp.Export: 962 self._match_text_seq("DATA") 963 964 return self.expression( 965 exp.Export, 966 connection=self._match_text_seq("WITH", "CONNECTION") and self._parse_table_parts(), 967 options=self._parse_properties(), 968 this=self._match_text_seq("AS") and self._parse_select(), 969 ) 970 971 class Generator(generator.Generator): 972 INTERVAL_ALLOWS_PLURAL_FORM = False 973 JOIN_HINTS = False 974 QUERY_HINTS = False 975 TABLE_HINTS = False 976 LIMIT_FETCH = "LIMIT" 977 RENAME_TABLE_WITH_DB = False 978 NVL2_SUPPORTED = False 979 UNNEST_WITH_ORDINALITY = False 980 COLLATE_IS_FUNC = True 981 LIMIT_ONLY_LITERALS = True 982 SUPPORTS_TABLE_ALIAS_COLUMNS = False 983 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 984 JSON_KEY_VALUE_PAIR_SEP = "," 985 NULL_ORDERING_SUPPORTED = False 986 IGNORE_NULLS_IN_FUNC = True 987 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 988 CAN_IMPLEMENT_ARRAY_ANY = True 989 SUPPORTS_TO_NUMBER = False 990 NAMED_PLACEHOLDER_TOKEN = "@" 991 HEX_FUNC = "TO_HEX" 992 WITH_PROPERTIES_PREFIX = "OPTIONS" 993 SUPPORTS_EXPLODING_PROJECTIONS = False 994 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 995 SUPPORTS_UNIX_SECONDS = True 996 997 TRANSFORMS = { 998 **generator.Generator.TRANSFORMS, 999 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 1000 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 1001 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 1002 exp.Array: inline_array_unless_query, 1003 exp.ArrayContains: _array_contains_sql, 1004 exp.ArrayFilter: filter_array_using_unnest, 1005 exp.ArrayRemove: filter_array_using_unnest, 1006 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 1007 exp.CollateProperty: lambda self, e: ( 1008 f"DEFAULT COLLATE {self.sql(e, 'this')}" 1009 if e.args.get("default") 1010 else f"COLLATE {self.sql(e, 'this')}" 1011 ), 1012 exp.Commit: lambda *_: "COMMIT TRANSACTION", 1013 exp.CountIf: rename_func("COUNTIF"), 1014 exp.Create: _create_sql, 1015 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 1016 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 1017 exp.DateDiff: lambda self, e: self.func( 1018 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 1019 ), 1020 exp.DateFromParts: rename_func("DATE"), 1021 exp.DateStrToDate: datestrtodate_sql, 1022 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 1023 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 1024 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 1025 exp.FromTimeZone: lambda self, e: self.func( 1026 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 1027 ), 1028 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 1029 exp.GroupConcat: lambda self, e: groupconcat_sql( 1030 self, e, func_name="STRING_AGG", within_group=False 1031 ), 1032 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 1033 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 1034 exp.If: if_sql(false_value="NULL"), 1035 exp.ILike: no_ilike_sql, 1036 exp.IntDiv: rename_func("DIV"), 1037 exp.Int64: rename_func("INT64"), 1038 exp.JSONExtract: _json_extract_sql, 1039 exp.JSONExtractArray: _json_extract_sql, 1040 exp.JSONExtractScalar: _json_extract_sql, 1041 exp.JSONFormat: rename_func("TO_JSON_STRING"), 1042 exp.Levenshtein: _levenshtein_sql, 1043 exp.Max: max_or_greatest, 1044 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 1045 exp.MD5Digest: rename_func("MD5"), 1046 exp.Min: min_or_least, 1047 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1048 exp.RegexpExtract: lambda self, e: self.func( 1049 "REGEXP_EXTRACT", 1050 e.this, 1051 e.expression, 1052 e.args.get("position"), 1053 e.args.get("occurrence"), 1054 ), 1055 exp.RegexpExtractAll: lambda self, e: self.func( 1056 "REGEXP_EXTRACT_ALL", e.this, e.expression 1057 ), 1058 exp.RegexpReplace: regexp_replace_sql, 1059 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 1060 exp.ReturnsProperty: _returnsproperty_sql, 1061 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 1062 exp.Select: transforms.preprocess( 1063 [ 1064 transforms.explode_projection_to_unnest(), 1065 transforms.unqualify_unnest, 1066 transforms.eliminate_distinct_on, 1067 _alias_ordered_group, 1068 transforms.eliminate_semi_and_anti_joins, 1069 ] 1070 ), 1071 exp.SHA: rename_func("SHA1"), 1072 exp.SHA2: sha256_sql, 1073 exp.Space: space_sql, 1074 exp.StabilityProperty: lambda self, e: ( 1075 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 1076 ), 1077 exp.String: rename_func("STRING"), 1078 exp.StrPosition: lambda self, e: ( 1079 strposition_sql( 1080 self, e, func_name="INSTR", supports_position=True, supports_occurrence=True 1081 ) 1082 ), 1083 exp.StrToDate: _str_to_datetime_sql, 1084 exp.StrToTime: _str_to_datetime_sql, 1085 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 1086 exp.TimeFromParts: rename_func("TIME"), 1087 exp.TimestampFromParts: rename_func("DATETIME"), 1088 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 1089 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 1090 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 1091 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 1092 exp.TimeStrToTime: timestrtotime_sql, 1093 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 1094 exp.TsOrDsAdd: _ts_or_ds_add_sql, 1095 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 1096 exp.TsOrDsToTime: rename_func("TIME"), 1097 exp.TsOrDsToDatetime: rename_func("DATETIME"), 1098 exp.TsOrDsToTimestamp: rename_func("TIMESTAMP"), 1099 exp.Unhex: rename_func("FROM_HEX"), 1100 exp.UnixDate: rename_func("UNIX_DATE"), 1101 exp.UnixToTime: _unix_to_time_sql, 1102 exp.Uuid: lambda *_: "GENERATE_UUID()", 1103 exp.Values: _derived_table_values_to_unnest, 1104 exp.VariancePop: rename_func("VAR_POP"), 1105 exp.SafeDivide: rename_func("SAFE_DIVIDE"), 1106 } 1107 1108 SUPPORTED_JSON_PATH_PARTS = { 1109 exp.JSONPathKey, 1110 exp.JSONPathRoot, 1111 exp.JSONPathSubscript, 1112 } 1113 1114 TYPE_MAPPING = { 1115 **generator.Generator.TYPE_MAPPING, 1116 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 1117 exp.DataType.Type.BIGINT: "INT64", 1118 exp.DataType.Type.BINARY: "BYTES", 1119 exp.DataType.Type.BLOB: "BYTES", 1120 exp.DataType.Type.BOOLEAN: "BOOL", 1121 exp.DataType.Type.CHAR: "STRING", 1122 exp.DataType.Type.DECIMAL: "NUMERIC", 1123 exp.DataType.Type.DOUBLE: "FLOAT64", 1124 exp.DataType.Type.FLOAT: "FLOAT64", 1125 exp.DataType.Type.INT: "INT64", 1126 exp.DataType.Type.NCHAR: "STRING", 1127 exp.DataType.Type.NVARCHAR: "STRING", 1128 exp.DataType.Type.SMALLINT: "INT64", 1129 exp.DataType.Type.TEXT: "STRING", 1130 exp.DataType.Type.TIMESTAMP: "DATETIME", 1131 exp.DataType.Type.TIMESTAMPNTZ: "DATETIME", 1132 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 1133 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 1134 exp.DataType.Type.TINYINT: "INT64", 1135 exp.DataType.Type.ROWVERSION: "BYTES", 1136 exp.DataType.Type.UUID: "STRING", 1137 exp.DataType.Type.VARBINARY: "BYTES", 1138 exp.DataType.Type.VARCHAR: "STRING", 1139 exp.DataType.Type.VARIANT: "ANY TYPE", 1140 } 1141 1142 PROPERTIES_LOCATION = { 1143 **generator.Generator.PROPERTIES_LOCATION, 1144 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1145 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1146 } 1147 1148 # WINDOW comes after QUALIFY 1149 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 1150 AFTER_HAVING_MODIFIER_TRANSFORMS = { 1151 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 1152 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 1153 } 1154 1155 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 1156 RESERVED_KEYWORDS = { 1157 "all", 1158 "and", 1159 "any", 1160 "array", 1161 "as", 1162 "asc", 1163 "assert_rows_modified", 1164 "at", 1165 "between", 1166 "by", 1167 "case", 1168 "cast", 1169 "collate", 1170 "contains", 1171 "create", 1172 "cross", 1173 "cube", 1174 "current", 1175 "default", 1176 "define", 1177 "desc", 1178 "distinct", 1179 "else", 1180 "end", 1181 "enum", 1182 "escape", 1183 "except", 1184 "exclude", 1185 "exists", 1186 "extract", 1187 "false", 1188 "fetch", 1189 "following", 1190 "for", 1191 "from", 1192 "full", 1193 "group", 1194 "grouping", 1195 "groups", 1196 "hash", 1197 "having", 1198 "if", 1199 "ignore", 1200 "in", 1201 "inner", 1202 "intersect", 1203 "interval", 1204 "into", 1205 "is", 1206 "join", 1207 "lateral", 1208 "left", 1209 "like", 1210 "limit", 1211 "lookup", 1212 "merge", 1213 "natural", 1214 "new", 1215 "no", 1216 "not", 1217 "null", 1218 "nulls", 1219 "of", 1220 "on", 1221 "or", 1222 "order", 1223 "outer", 1224 "over", 1225 "partition", 1226 "preceding", 1227 "proto", 1228 "qualify", 1229 "range", 1230 "recursive", 1231 "respect", 1232 "right", 1233 "rollup", 1234 "rows", 1235 "select", 1236 "set", 1237 "some", 1238 "struct", 1239 "tablesample", 1240 "then", 1241 "to", 1242 "treat", 1243 "true", 1244 "unbounded", 1245 "union", 1246 "unnest", 1247 "using", 1248 "when", 1249 "where", 1250 "window", 1251 "with", 1252 "within", 1253 } 1254 1255 def datetrunc_sql(self, expression: exp.DateTrunc) -> str: 1256 unit = expression.unit 1257 unit_sql = unit.name if unit.is_string else self.sql(unit) 1258 return self.func("DATE_TRUNC", expression.this, unit_sql, expression.args.get("zone")) 1259 1260 def mod_sql(self, expression: exp.Mod) -> str: 1261 this = expression.this 1262 expr = expression.expression 1263 return self.func( 1264 "MOD", 1265 this.unnest() if isinstance(this, exp.Paren) else this, 1266 expr.unnest() if isinstance(expr, exp.Paren) else expr, 1267 ) 1268 1269 def column_parts(self, expression: exp.Column) -> str: 1270 if expression.meta.get("quoted_column"): 1271 # If a column reference is of the form `dataset.table`.name, we need 1272 # to preserve the quoted table path, otherwise the reference breaks 1273 table_parts = ".".join(p.name for p in expression.parts[:-1]) 1274 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 1275 return f"{table_path}.{self.sql(expression, 'this')}" 1276 1277 return super().column_parts(expression) 1278 1279 def table_parts(self, expression: exp.Table) -> str: 1280 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1281 # we need to make sure the correct quoting is used in each case. 1282 # 1283 # For example, if there is a CTE x that clashes with a schema name, then the former will 1284 # return the table y in that schema, whereas the latter will return the CTE's y column: 1285 # 1286 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1287 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1288 if expression.meta.get("quoted_table"): 1289 table_parts = ".".join(p.name for p in expression.parts) 1290 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1291 1292 return super().table_parts(expression) 1293 1294 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1295 this = expression.this 1296 if isinstance(this, exp.TsOrDsToDatetime): 1297 func_name = "FORMAT_DATETIME" 1298 elif isinstance(this, exp.TsOrDsToTimestamp): 1299 func_name = "FORMAT_TIMESTAMP" 1300 else: 1301 func_name = "FORMAT_DATE" 1302 1303 time_expr = ( 1304 this 1305 if isinstance(this, (exp.TsOrDsToDatetime, exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 1306 else expression 1307 ) 1308 return self.func( 1309 func_name, self.format_time(expression), time_expr.this, expression.args.get("zone") 1310 ) 1311 1312 def eq_sql(self, expression: exp.EQ) -> str: 1313 # Operands of = cannot be NULL in BigQuery 1314 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1315 if not isinstance(expression.parent, exp.Update): 1316 return "NULL" 1317 1318 return self.binary(expression, "=") 1319 1320 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1321 parent = expression.parent 1322 1323 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1324 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1325 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1326 return self.func( 1327 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1328 ) 1329 1330 return super().attimezone_sql(expression) 1331 1332 def trycast_sql(self, expression: exp.TryCast) -> str: 1333 return self.cast_sql(expression, safe_prefix="SAFE_") 1334 1335 def bracket_sql(self, expression: exp.Bracket) -> str: 1336 this = expression.this 1337 expressions = expression.expressions 1338 1339 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1340 arg = expressions[0] 1341 if arg.type is None: 1342 from sqlglot.optimizer.annotate_types import annotate_types 1343 1344 arg = annotate_types(arg, dialect=self.dialect) 1345 1346 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1347 # BQ doesn't support bracket syntax with string values for structs 1348 return f"{self.sql(this)}.{arg.name}" 1349 1350 expressions_sql = self.expressions(expression, flat=True) 1351 offset = expression.args.get("offset") 1352 1353 if offset == 0: 1354 expressions_sql = f"OFFSET({expressions_sql})" 1355 elif offset == 1: 1356 expressions_sql = f"ORDINAL({expressions_sql})" 1357 elif offset is not None: 1358 self.unsupported(f"Unsupported array offset: {offset}") 1359 1360 if expression.args.get("safe"): 1361 expressions_sql = f"SAFE_{expressions_sql}" 1362 1363 return f"{self.sql(this)}[{expressions_sql}]" 1364 1365 def in_unnest_op(self, expression: exp.Unnest) -> str: 1366 return self.sql(expression) 1367 1368 def version_sql(self, expression: exp.Version) -> str: 1369 if expression.name == "TIMESTAMP": 1370 expression.set("this", "SYSTEM_TIME") 1371 return super().version_sql(expression) 1372 1373 def contains_sql(self, expression: exp.Contains) -> str: 1374 this = expression.this 1375 expr = expression.expression 1376 1377 if isinstance(this, exp.Lower) and isinstance(expr, exp.Lower): 1378 this = this.this 1379 expr = expr.this 1380 1381 return self.func("CONTAINS_SUBSTR", this, expr) 1382 1383 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1384 this = expression.this 1385 1386 # This ensures that inline type-annotated ARRAY literals like ARRAY<INT64>[1, 2, 3] 1387 # are roundtripped unaffected. The inner check excludes ARRAY(SELECT ...) expressions, 1388 # because they aren't literals and so the above syntax is invalid BigQuery. 1389 if isinstance(this, exp.Array): 1390 elem = seq_get(this.expressions, 0) 1391 if not (elem and elem.find(exp.Query)): 1392 return f"{self.sql(expression, 'to')}{self.sql(this)}" 1393 1394 return super().cast_sql(expression, safe_prefix=safe_prefix) 1395 1396 def declareitem_sql(self, expression: exp.DeclareItem) -> str: 1397 variables = self.expressions(expression, "this") 1398 default = self.sql(expression, "default") 1399 default = f" DEFAULT {default}" if default else "" 1400 kind = self.sql(expression, "kind") 1401 kind = f" {kind}" if kind else "" 1402 1403 return f"{variables}{kind}{default}"
396class BigQuery(Dialect): 397 WEEK_OFFSET = -1 398 UNNEST_COLUMN_ONLY = True 399 SUPPORTS_USER_DEFINED_TYPES = False 400 SUPPORTS_SEMI_ANTI_JOIN = False 401 LOG_BASE_FIRST = False 402 HEX_LOWERCASE = True 403 FORCE_EARLY_ALIAS_REF_EXPANSION = True 404 PRESERVE_ORIGINAL_NAMES = True 405 HEX_STRING_IS_INTEGER_TYPE = True 406 407 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 408 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 409 410 # bigquery udfs are case sensitive 411 NORMALIZE_FUNCTIONS = False 412 413 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 414 TIME_MAPPING = { 415 "%D": "%m/%d/%y", 416 "%E6S": "%S.%f", 417 "%e": "%-d", 418 } 419 420 FORMAT_MAPPING = { 421 "DD": "%d", 422 "MM": "%m", 423 "MON": "%b", 424 "MONTH": "%B", 425 "YYYY": "%Y", 426 "YY": "%y", 427 "HH": "%I", 428 "HH12": "%I", 429 "HH24": "%H", 430 "MI": "%M", 431 "SS": "%S", 432 "SSSSS": "%f", 433 "TZH": "%z", 434 } 435 436 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 437 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 438 # https://cloud.google.com/bigquery/docs/querying-wildcard-tables#scanning_a_range_of_tables_using_table_suffix 439 # https://cloud.google.com/bigquery/docs/query-cloud-storage-data#query_the_file_name_pseudo-column 440 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE", "_TABLE_SUFFIX", "_FILE_NAME"} 441 442 # All set operations require either a DISTINCT or ALL specifier 443 SET_OP_DISTINCT_BY_DEFAULT = dict.fromkeys((exp.Except, exp.Intersect, exp.Union), None) 444 445 # BigQuery maps Type.TIMESTAMP to DATETIME, so we need to amend the inferred types 446 TYPE_TO_EXPRESSIONS = { 447 **Dialect.TYPE_TO_EXPRESSIONS, 448 exp.DataType.Type.TIMESTAMPTZ: Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.TIMESTAMP], 449 } 450 TYPE_TO_EXPRESSIONS.pop(exp.DataType.Type.TIMESTAMP) 451 452 ANNOTATORS = { 453 **Dialect.ANNOTATORS, 454 **{ 455 expr_type: annotate_with_type_lambda(data_type) 456 for data_type, expressions in TYPE_TO_EXPRESSIONS.items() 457 for expr_type in expressions 458 }, 459 **{ 460 expr_type: lambda self, e: _annotate_math_functions(self, e) 461 for expr_type in (exp.Floor, exp.Ceil, exp.Log, exp.Ln, exp.Sqrt, exp.Exp, exp.Round) 462 }, 463 **{ 464 expr_type: lambda self, e: self._annotate_by_args(e, "this") 465 for expr_type in ( 466 exp.Left, 467 exp.Right, 468 exp.Lower, 469 exp.Upper, 470 exp.Pad, 471 exp.Trim, 472 exp.RegexpExtract, 473 exp.RegexpReplace, 474 exp.Repeat, 475 exp.Substring, 476 ) 477 }, 478 exp.Array: _annotate_array, 479 exp.ArrayConcat: lambda self, e: self._annotate_by_args(e, "this", "expressions"), 480 exp.Ascii: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 481 exp.BitwiseAndAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 482 exp.BitwiseOrAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 483 exp.BitwiseXorAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 484 exp.BitwiseCountAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 485 exp.Concat: _annotate_concat, 486 exp.Corr: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE), 487 exp.CovarPop: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE), 488 exp.CovarSamp: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE), 489 exp.JSONArray: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.JSON), 490 exp.JSONExtractScalar: lambda self, e: self._annotate_with_type( 491 e, exp.DataType.Type.VARCHAR 492 ), 493 exp.JSONValueArray: lambda self, e: self._annotate_with_type( 494 e, exp.DataType.build("ARRAY<VARCHAR>") 495 ), 496 exp.JSONType: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR), 497 exp.Lag: lambda self, e: self._annotate_by_args(e, "this", "default"), 498 exp.SHA: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY), 499 exp.SHA2: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY), 500 exp.Sign: lambda self, e: self._annotate_by_args(e, "this"), 501 exp.Split: lambda self, e: self._annotate_by_args(e, "this", array=True), 502 exp.TimestampFromParts: lambda self, e: self._annotate_with_type( 503 e, exp.DataType.Type.DATETIME 504 ), 505 exp.Unicode: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 506 } 507 508 def normalize_identifier(self, expression: E) -> E: 509 if ( 510 isinstance(expression, exp.Identifier) 511 and self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE 512 ): 513 parent = expression.parent 514 while isinstance(parent, exp.Dot): 515 parent = parent.parent 516 517 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 518 # by default. The following check uses a heuristic to detect tables based on whether 519 # they are qualified. This should generally be correct, because tables in BigQuery 520 # must be qualified with at least a dataset, unless @@dataset_id is set. 521 case_sensitive = ( 522 isinstance(parent, exp.UserDefinedFunction) 523 or ( 524 isinstance(parent, exp.Table) 525 and parent.db 526 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 527 ) 528 or expression.meta.get("is_table") 529 ) 530 if not case_sensitive: 531 expression.set("this", expression.this.lower()) 532 533 return t.cast(E, expression) 534 535 return super().normalize_identifier(expression) 536 537 class Tokenizer(tokens.Tokenizer): 538 QUOTES = ["'", '"', '"""', "'''"] 539 COMMENTS = ["--", "#", ("/*", "*/")] 540 IDENTIFIERS = ["`"] 541 STRING_ESCAPES = ["\\"] 542 543 HEX_STRINGS = [("0x", ""), ("0X", "")] 544 545 BYTE_STRINGS = [ 546 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 547 ] 548 549 RAW_STRINGS = [ 550 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 551 ] 552 553 NESTED_COMMENTS = False 554 555 KEYWORDS = { 556 **tokens.Tokenizer.KEYWORDS, 557 "ANY TYPE": TokenType.VARIANT, 558 "BEGIN": TokenType.COMMAND, 559 "BEGIN TRANSACTION": TokenType.BEGIN, 560 "BYTEINT": TokenType.INT, 561 "BYTES": TokenType.BINARY, 562 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 563 "DATETIME": TokenType.TIMESTAMP, 564 "DECLARE": TokenType.DECLARE, 565 "ELSEIF": TokenType.COMMAND, 566 "EXCEPTION": TokenType.COMMAND, 567 "EXPORT": TokenType.EXPORT, 568 "FLOAT64": TokenType.DOUBLE, 569 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 570 "MODEL": TokenType.MODEL, 571 "NOT DETERMINISTIC": TokenType.VOLATILE, 572 "RECORD": TokenType.STRUCT, 573 "TIMESTAMP": TokenType.TIMESTAMPTZ, 574 } 575 KEYWORDS.pop("DIV") 576 KEYWORDS.pop("VALUES") 577 KEYWORDS.pop("/*+") 578 579 class Parser(parser.Parser): 580 PREFIXED_PIVOT_COLUMNS = True 581 LOG_DEFAULTS_TO_LN = True 582 SUPPORTS_IMPLICIT_UNNEST = True 583 JOINS_HAVE_EQUAL_PRECEDENCE = True 584 585 # BigQuery does not allow ASC/DESC to be used as an identifier 586 ID_VAR_TOKENS = parser.Parser.ID_VAR_TOKENS - {TokenType.ASC, TokenType.DESC} 587 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - {TokenType.ASC, TokenType.DESC} 588 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - {TokenType.ASC, TokenType.DESC} 589 COMMENT_TABLE_ALIAS_TOKENS = parser.Parser.COMMENT_TABLE_ALIAS_TOKENS - { 590 TokenType.ASC, 591 TokenType.DESC, 592 } 593 UPDATE_ALIAS_TOKENS = parser.Parser.UPDATE_ALIAS_TOKENS - {TokenType.ASC, TokenType.DESC} 594 595 FUNCTIONS = { 596 **parser.Parser.FUNCTIONS, 597 "CONTAINS_SUBSTR": _build_contains_substring, 598 "DATE": _build_date, 599 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 600 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 601 "DATE_TRUNC": lambda args: exp.DateTrunc( 602 unit=seq_get(args, 1), 603 this=seq_get(args, 0), 604 zone=seq_get(args, 2), 605 ), 606 "DATETIME": _build_datetime, 607 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 608 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 609 "DIV": binary_from_function(exp.IntDiv), 610 "EDIT_DISTANCE": _build_levenshtein, 611 "FORMAT_DATE": _build_format_time(exp.TsOrDsToDate), 612 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 613 "JSON_EXTRACT_SCALAR": _build_extract_json_with_default_path(exp.JSONExtractScalar), 614 "JSON_EXTRACT_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 615 "JSON_QUERY": parser.build_extract_json_with_path(exp.JSONExtract), 616 "JSON_QUERY_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 617 "JSON_VALUE": _build_extract_json_with_default_path(exp.JSONExtractScalar), 618 "JSON_VALUE_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray), 619 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 620 "MD5": exp.MD5Digest.from_arg_list, 621 "TO_HEX": _build_to_hex, 622 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 623 [seq_get(args, 1), seq_get(args, 0)] 624 ), 625 "PARSE_TIMESTAMP": _build_parse_timestamp, 626 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 627 "REGEXP_EXTRACT": _build_regexp_extract(exp.RegexpExtract), 628 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 629 "REGEXP_EXTRACT_ALL": _build_regexp_extract( 630 exp.RegexpExtractAll, default_group=exp.Literal.number(0) 631 ), 632 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 633 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 634 "SPLIT": lambda args: exp.Split( 635 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 636 this=seq_get(args, 0), 637 expression=seq_get(args, 1) or exp.Literal.string(","), 638 ), 639 "STRPOS": exp.StrPosition.from_arg_list, 640 "TIME": _build_time, 641 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 642 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 643 "TIMESTAMP": _build_timestamp, 644 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 645 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 646 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 647 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 648 ), 649 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 650 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 651 ), 652 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 653 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 654 "FORMAT_DATETIME": _build_format_time(exp.TsOrDsToDatetime), 655 "FORMAT_TIMESTAMP": _build_format_time(exp.TsOrDsToTimestamp), 656 } 657 658 FUNCTION_PARSERS = { 659 **parser.Parser.FUNCTION_PARSERS, 660 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 661 "JSON_ARRAY": lambda self: self.expression( 662 exp.JSONArray, expressions=self._parse_csv(self._parse_bitwise) 663 ), 664 "MAKE_INTERVAL": lambda self: self._parse_make_interval(), 665 "FEATURES_AT_TIME": lambda self: self._parse_features_at_time(), 666 } 667 FUNCTION_PARSERS.pop("TRIM") 668 669 NO_PAREN_FUNCTIONS = { 670 **parser.Parser.NO_PAREN_FUNCTIONS, 671 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 672 } 673 674 NESTED_TYPE_TOKENS = { 675 *parser.Parser.NESTED_TYPE_TOKENS, 676 TokenType.TABLE, 677 } 678 679 PROPERTY_PARSERS = { 680 **parser.Parser.PROPERTY_PARSERS, 681 "NOT DETERMINISTIC": lambda self: self.expression( 682 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 683 ), 684 "OPTIONS": lambda self: self._parse_with_property(), 685 } 686 687 CONSTRAINT_PARSERS = { 688 **parser.Parser.CONSTRAINT_PARSERS, 689 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 690 } 691 692 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 693 RANGE_PARSERS.pop(TokenType.OVERLAPS) 694 695 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 696 697 DASHED_TABLE_PART_FOLLOW_TOKENS = {TokenType.DOT, TokenType.L_PAREN, TokenType.R_PAREN} 698 699 STATEMENT_PARSERS = { 700 **parser.Parser.STATEMENT_PARSERS, 701 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 702 TokenType.END: lambda self: self._parse_as_command(self._prev), 703 TokenType.FOR: lambda self: self._parse_for_in(), 704 TokenType.EXPORT: lambda self: self._parse_export_data(), 705 TokenType.DECLARE: lambda self: self._parse_declare(), 706 } 707 708 BRACKET_OFFSETS = { 709 "OFFSET": (0, False), 710 "ORDINAL": (1, False), 711 "SAFE_OFFSET": (0, True), 712 "SAFE_ORDINAL": (1, True), 713 } 714 715 def _parse_for_in(self) -> exp.ForIn: 716 this = self._parse_range() 717 self._match_text_seq("DO") 718 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 719 720 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 721 this = super()._parse_table_part(schema=schema) or self._parse_number() 722 723 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 724 if isinstance(this, exp.Identifier): 725 table_name = this.name 726 while self._match(TokenType.DASH, advance=False) and self._next: 727 start = self._curr 728 while self._is_connected() and not self._match_set( 729 self.DASHED_TABLE_PART_FOLLOW_TOKENS, advance=False 730 ): 731 self._advance() 732 733 if start == self._curr: 734 break 735 736 table_name += self._find_sql(start, self._prev) 737 738 this = exp.Identifier( 739 this=table_name, quoted=this.args.get("quoted") 740 ).update_positions(this) 741 elif isinstance(this, exp.Literal): 742 table_name = this.name 743 744 if self._is_connected() and self._parse_var(any_token=True): 745 table_name += self._prev.text 746 747 this = exp.Identifier(this=table_name, quoted=True).update_positions(this) 748 749 return this 750 751 def _parse_table_parts( 752 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 753 ) -> exp.Table: 754 table = super()._parse_table_parts( 755 schema=schema, is_db_reference=is_db_reference, wildcard=True 756 ) 757 758 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 759 if not table.catalog: 760 if table.db: 761 previous_db = table.args["db"] 762 parts = table.db.split(".") 763 if len(parts) == 2 and not table.args["db"].quoted: 764 table.set( 765 "catalog", exp.Identifier(this=parts[0]).update_positions(previous_db) 766 ) 767 table.set("db", exp.Identifier(this=parts[1]).update_positions(previous_db)) 768 else: 769 previous_this = table.this 770 parts = table.name.split(".") 771 if len(parts) == 2 and not table.this.quoted: 772 table.set( 773 "db", exp.Identifier(this=parts[0]).update_positions(previous_this) 774 ) 775 table.set( 776 "this", exp.Identifier(this=parts[1]).update_positions(previous_this) 777 ) 778 779 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 780 alias = table.this 781 catalog, db, this, *rest = ( 782 exp.to_identifier(p, quoted=True) 783 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 784 ) 785 786 for part in (catalog, db, this): 787 if part: 788 part.update_positions(table.this) 789 790 if rest and this: 791 this = exp.Dot.build([this, *rest]) # type: ignore 792 793 table = exp.Table( 794 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 795 ) 796 table.meta["quoted_table"] = True 797 else: 798 alias = None 799 800 # The `INFORMATION_SCHEMA` views in BigQuery need to be qualified by a region or 801 # dataset, so if the project identifier is omitted we need to fix the ast so that 802 # the `INFORMATION_SCHEMA.X` bit is represented as a single (quoted) Identifier. 803 # Otherwise, we wouldn't correctly qualify a `Table` node that references these 804 # views, because it would seem like the "catalog" part is set, when it'd actually 805 # be the region/dataset. Merging the two identifiers into a single one is done to 806 # avoid producing a 4-part Table reference, which would cause issues in the schema 807 # module, when there are 3-part table names mixed with information schema views. 808 # 809 # See: https://cloud.google.com/bigquery/docs/information-schema-intro#syntax 810 table_parts = table.parts 811 if len(table_parts) > 1 and table_parts[-2].name.upper() == "INFORMATION_SCHEMA": 812 # We need to alias the table here to avoid breaking existing qualified columns. 813 # This is expected to be safe, because if there's an actual alias coming up in 814 # the token stream, it will overwrite this one. If there isn't one, we are only 815 # exposing the name that can be used to reference the view explicitly (a no-op). 816 exp.alias_( 817 table, 818 t.cast(exp.Identifier, alias or table_parts[-1]), 819 table=True, 820 copy=False, 821 ) 822 823 info_schema_view = f"{table_parts[-2].name}.{table_parts[-1].name}" 824 new_this = exp.Identifier(this=info_schema_view, quoted=True).update_positions( 825 line=table_parts[-2].meta.get("line"), 826 col=table_parts[-1].meta.get("col"), 827 start=table_parts[-2].meta.get("start"), 828 end=table_parts[-1].meta.get("end"), 829 ) 830 table.set("this", new_this) 831 table.set("db", seq_get(table_parts, -3)) 832 table.set("catalog", seq_get(table_parts, -4)) 833 834 return table 835 836 def _parse_column(self) -> t.Optional[exp.Expression]: 837 column = super()._parse_column() 838 if isinstance(column, exp.Column): 839 parts = column.parts 840 if any("." in p.name for p in parts): 841 catalog, db, table, this, *rest = ( 842 exp.to_identifier(p, quoted=True) 843 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 844 ) 845 846 if rest and this: 847 this = exp.Dot.build([this, *rest]) # type: ignore 848 849 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 850 column.meta["quoted_column"] = True 851 852 return column 853 854 @t.overload 855 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 856 857 @t.overload 858 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 859 860 def _parse_json_object(self, agg=False): 861 json_object = super()._parse_json_object() 862 array_kv_pair = seq_get(json_object.expressions, 0) 863 864 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 865 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 866 if ( 867 array_kv_pair 868 and isinstance(array_kv_pair.this, exp.Array) 869 and isinstance(array_kv_pair.expression, exp.Array) 870 ): 871 keys = array_kv_pair.this.expressions 872 values = array_kv_pair.expression.expressions 873 874 json_object.set( 875 "expressions", 876 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 877 ) 878 879 return json_object 880 881 def _parse_bracket( 882 self, this: t.Optional[exp.Expression] = None 883 ) -> t.Optional[exp.Expression]: 884 bracket = super()._parse_bracket(this) 885 886 if this is bracket: 887 return bracket 888 889 if isinstance(bracket, exp.Bracket): 890 for expression in bracket.expressions: 891 name = expression.name.upper() 892 893 if name not in self.BRACKET_OFFSETS: 894 break 895 896 offset, safe = self.BRACKET_OFFSETS[name] 897 bracket.set("offset", offset) 898 bracket.set("safe", safe) 899 expression.replace(expression.expressions[0]) 900 901 return bracket 902 903 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 904 unnest = super()._parse_unnest(with_alias=with_alias) 905 906 if not unnest: 907 return None 908 909 unnest_expr = seq_get(unnest.expressions, 0) 910 if unnest_expr: 911 from sqlglot.optimizer.annotate_types import annotate_types 912 913 unnest_expr = annotate_types(unnest_expr, dialect=self.dialect) 914 915 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 916 # in contrast to other dialects such as DuckDB which flattens only the array by default 917 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 918 array_elem.is_type(exp.DataType.Type.STRUCT) 919 for array_elem in unnest_expr._type.expressions 920 ): 921 unnest.set("explode_array", True) 922 923 return unnest 924 925 def _parse_make_interval(self) -> exp.MakeInterval: 926 expr = exp.MakeInterval() 927 928 for arg_key in expr.arg_types: 929 value = self._parse_lambda() 930 931 if not value: 932 break 933 934 # Non-named arguments are filled sequentially, (optionally) followed by named arguments 935 # that can appear in any order e.g MAKE_INTERVAL(1, minute => 5, day => 2) 936 if isinstance(value, exp.Kwarg): 937 arg_key = value.this.name 938 939 expr.set(arg_key, value) 940 941 self._match(TokenType.COMMA) 942 943 return expr 944 945 def _parse_features_at_time(self) -> exp.FeaturesAtTime: 946 expr = self.expression( 947 exp.FeaturesAtTime, 948 this=(self._match(TokenType.TABLE) and self._parse_table()) 949 or self._parse_select(nested=True), 950 ) 951 952 while self._match(TokenType.COMMA): 953 arg = self._parse_lambda() 954 955 # Get the LHS of the Kwarg and set the arg to that value, e.g 956 # "num_rows => 1" sets the expr's `num_rows` arg 957 if arg: 958 expr.set(arg.this.name, arg) 959 960 return expr 961 962 def _parse_export_data(self) -> exp.Export: 963 self._match_text_seq("DATA") 964 965 return self.expression( 966 exp.Export, 967 connection=self._match_text_seq("WITH", "CONNECTION") and self._parse_table_parts(), 968 options=self._parse_properties(), 969 this=self._match_text_seq("AS") and self._parse_select(), 970 ) 971 972 class Generator(generator.Generator): 973 INTERVAL_ALLOWS_PLURAL_FORM = False 974 JOIN_HINTS = False 975 QUERY_HINTS = False 976 TABLE_HINTS = False 977 LIMIT_FETCH = "LIMIT" 978 RENAME_TABLE_WITH_DB = False 979 NVL2_SUPPORTED = False 980 UNNEST_WITH_ORDINALITY = False 981 COLLATE_IS_FUNC = True 982 LIMIT_ONLY_LITERALS = True 983 SUPPORTS_TABLE_ALIAS_COLUMNS = False 984 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 985 JSON_KEY_VALUE_PAIR_SEP = "," 986 NULL_ORDERING_SUPPORTED = False 987 IGNORE_NULLS_IN_FUNC = True 988 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 989 CAN_IMPLEMENT_ARRAY_ANY = True 990 SUPPORTS_TO_NUMBER = False 991 NAMED_PLACEHOLDER_TOKEN = "@" 992 HEX_FUNC = "TO_HEX" 993 WITH_PROPERTIES_PREFIX = "OPTIONS" 994 SUPPORTS_EXPLODING_PROJECTIONS = False 995 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 996 SUPPORTS_UNIX_SECONDS = True 997 998 TRANSFORMS = { 999 **generator.Generator.TRANSFORMS, 1000 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 1001 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 1002 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 1003 exp.Array: inline_array_unless_query, 1004 exp.ArrayContains: _array_contains_sql, 1005 exp.ArrayFilter: filter_array_using_unnest, 1006 exp.ArrayRemove: filter_array_using_unnest, 1007 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 1008 exp.CollateProperty: lambda self, e: ( 1009 f"DEFAULT COLLATE {self.sql(e, 'this')}" 1010 if e.args.get("default") 1011 else f"COLLATE {self.sql(e, 'this')}" 1012 ), 1013 exp.Commit: lambda *_: "COMMIT TRANSACTION", 1014 exp.CountIf: rename_func("COUNTIF"), 1015 exp.Create: _create_sql, 1016 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 1017 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 1018 exp.DateDiff: lambda self, e: self.func( 1019 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 1020 ), 1021 exp.DateFromParts: rename_func("DATE"), 1022 exp.DateStrToDate: datestrtodate_sql, 1023 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 1024 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 1025 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 1026 exp.FromTimeZone: lambda self, e: self.func( 1027 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 1028 ), 1029 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 1030 exp.GroupConcat: lambda self, e: groupconcat_sql( 1031 self, e, func_name="STRING_AGG", within_group=False 1032 ), 1033 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 1034 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 1035 exp.If: if_sql(false_value="NULL"), 1036 exp.ILike: no_ilike_sql, 1037 exp.IntDiv: rename_func("DIV"), 1038 exp.Int64: rename_func("INT64"), 1039 exp.JSONExtract: _json_extract_sql, 1040 exp.JSONExtractArray: _json_extract_sql, 1041 exp.JSONExtractScalar: _json_extract_sql, 1042 exp.JSONFormat: rename_func("TO_JSON_STRING"), 1043 exp.Levenshtein: _levenshtein_sql, 1044 exp.Max: max_or_greatest, 1045 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 1046 exp.MD5Digest: rename_func("MD5"), 1047 exp.Min: min_or_least, 1048 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1049 exp.RegexpExtract: lambda self, e: self.func( 1050 "REGEXP_EXTRACT", 1051 e.this, 1052 e.expression, 1053 e.args.get("position"), 1054 e.args.get("occurrence"), 1055 ), 1056 exp.RegexpExtractAll: lambda self, e: self.func( 1057 "REGEXP_EXTRACT_ALL", e.this, e.expression 1058 ), 1059 exp.RegexpReplace: regexp_replace_sql, 1060 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 1061 exp.ReturnsProperty: _returnsproperty_sql, 1062 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 1063 exp.Select: transforms.preprocess( 1064 [ 1065 transforms.explode_projection_to_unnest(), 1066 transforms.unqualify_unnest, 1067 transforms.eliminate_distinct_on, 1068 _alias_ordered_group, 1069 transforms.eliminate_semi_and_anti_joins, 1070 ] 1071 ), 1072 exp.SHA: rename_func("SHA1"), 1073 exp.SHA2: sha256_sql, 1074 exp.Space: space_sql, 1075 exp.StabilityProperty: lambda self, e: ( 1076 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 1077 ), 1078 exp.String: rename_func("STRING"), 1079 exp.StrPosition: lambda self, e: ( 1080 strposition_sql( 1081 self, e, func_name="INSTR", supports_position=True, supports_occurrence=True 1082 ) 1083 ), 1084 exp.StrToDate: _str_to_datetime_sql, 1085 exp.StrToTime: _str_to_datetime_sql, 1086 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 1087 exp.TimeFromParts: rename_func("TIME"), 1088 exp.TimestampFromParts: rename_func("DATETIME"), 1089 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 1090 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 1091 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 1092 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 1093 exp.TimeStrToTime: timestrtotime_sql, 1094 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 1095 exp.TsOrDsAdd: _ts_or_ds_add_sql, 1096 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 1097 exp.TsOrDsToTime: rename_func("TIME"), 1098 exp.TsOrDsToDatetime: rename_func("DATETIME"), 1099 exp.TsOrDsToTimestamp: rename_func("TIMESTAMP"), 1100 exp.Unhex: rename_func("FROM_HEX"), 1101 exp.UnixDate: rename_func("UNIX_DATE"), 1102 exp.UnixToTime: _unix_to_time_sql, 1103 exp.Uuid: lambda *_: "GENERATE_UUID()", 1104 exp.Values: _derived_table_values_to_unnest, 1105 exp.VariancePop: rename_func("VAR_POP"), 1106 exp.SafeDivide: rename_func("SAFE_DIVIDE"), 1107 } 1108 1109 SUPPORTED_JSON_PATH_PARTS = { 1110 exp.JSONPathKey, 1111 exp.JSONPathRoot, 1112 exp.JSONPathSubscript, 1113 } 1114 1115 TYPE_MAPPING = { 1116 **generator.Generator.TYPE_MAPPING, 1117 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 1118 exp.DataType.Type.BIGINT: "INT64", 1119 exp.DataType.Type.BINARY: "BYTES", 1120 exp.DataType.Type.BLOB: "BYTES", 1121 exp.DataType.Type.BOOLEAN: "BOOL", 1122 exp.DataType.Type.CHAR: "STRING", 1123 exp.DataType.Type.DECIMAL: "NUMERIC", 1124 exp.DataType.Type.DOUBLE: "FLOAT64", 1125 exp.DataType.Type.FLOAT: "FLOAT64", 1126 exp.DataType.Type.INT: "INT64", 1127 exp.DataType.Type.NCHAR: "STRING", 1128 exp.DataType.Type.NVARCHAR: "STRING", 1129 exp.DataType.Type.SMALLINT: "INT64", 1130 exp.DataType.Type.TEXT: "STRING", 1131 exp.DataType.Type.TIMESTAMP: "DATETIME", 1132 exp.DataType.Type.TIMESTAMPNTZ: "DATETIME", 1133 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 1134 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 1135 exp.DataType.Type.TINYINT: "INT64", 1136 exp.DataType.Type.ROWVERSION: "BYTES", 1137 exp.DataType.Type.UUID: "STRING", 1138 exp.DataType.Type.VARBINARY: "BYTES", 1139 exp.DataType.Type.VARCHAR: "STRING", 1140 exp.DataType.Type.VARIANT: "ANY TYPE", 1141 } 1142 1143 PROPERTIES_LOCATION = { 1144 **generator.Generator.PROPERTIES_LOCATION, 1145 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1146 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1147 } 1148 1149 # WINDOW comes after QUALIFY 1150 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 1151 AFTER_HAVING_MODIFIER_TRANSFORMS = { 1152 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 1153 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 1154 } 1155 1156 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 1157 RESERVED_KEYWORDS = { 1158 "all", 1159 "and", 1160 "any", 1161 "array", 1162 "as", 1163 "asc", 1164 "assert_rows_modified", 1165 "at", 1166 "between", 1167 "by", 1168 "case", 1169 "cast", 1170 "collate", 1171 "contains", 1172 "create", 1173 "cross", 1174 "cube", 1175 "current", 1176 "default", 1177 "define", 1178 "desc", 1179 "distinct", 1180 "else", 1181 "end", 1182 "enum", 1183 "escape", 1184 "except", 1185 "exclude", 1186 "exists", 1187 "extract", 1188 "false", 1189 "fetch", 1190 "following", 1191 "for", 1192 "from", 1193 "full", 1194 "group", 1195 "grouping", 1196 "groups", 1197 "hash", 1198 "having", 1199 "if", 1200 "ignore", 1201 "in", 1202 "inner", 1203 "intersect", 1204 "interval", 1205 "into", 1206 "is", 1207 "join", 1208 "lateral", 1209 "left", 1210 "like", 1211 "limit", 1212 "lookup", 1213 "merge", 1214 "natural", 1215 "new", 1216 "no", 1217 "not", 1218 "null", 1219 "nulls", 1220 "of", 1221 "on", 1222 "or", 1223 "order", 1224 "outer", 1225 "over", 1226 "partition", 1227 "preceding", 1228 "proto", 1229 "qualify", 1230 "range", 1231 "recursive", 1232 "respect", 1233 "right", 1234 "rollup", 1235 "rows", 1236 "select", 1237 "set", 1238 "some", 1239 "struct", 1240 "tablesample", 1241 "then", 1242 "to", 1243 "treat", 1244 "true", 1245 "unbounded", 1246 "union", 1247 "unnest", 1248 "using", 1249 "when", 1250 "where", 1251 "window", 1252 "with", 1253 "within", 1254 } 1255 1256 def datetrunc_sql(self, expression: exp.DateTrunc) -> str: 1257 unit = expression.unit 1258 unit_sql = unit.name if unit.is_string else self.sql(unit) 1259 return self.func("DATE_TRUNC", expression.this, unit_sql, expression.args.get("zone")) 1260 1261 def mod_sql(self, expression: exp.Mod) -> str: 1262 this = expression.this 1263 expr = expression.expression 1264 return self.func( 1265 "MOD", 1266 this.unnest() if isinstance(this, exp.Paren) else this, 1267 expr.unnest() if isinstance(expr, exp.Paren) else expr, 1268 ) 1269 1270 def column_parts(self, expression: exp.Column) -> str: 1271 if expression.meta.get("quoted_column"): 1272 # If a column reference is of the form `dataset.table`.name, we need 1273 # to preserve the quoted table path, otherwise the reference breaks 1274 table_parts = ".".join(p.name for p in expression.parts[:-1]) 1275 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 1276 return f"{table_path}.{self.sql(expression, 'this')}" 1277 1278 return super().column_parts(expression) 1279 1280 def table_parts(self, expression: exp.Table) -> str: 1281 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1282 # we need to make sure the correct quoting is used in each case. 1283 # 1284 # For example, if there is a CTE x that clashes with a schema name, then the former will 1285 # return the table y in that schema, whereas the latter will return the CTE's y column: 1286 # 1287 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1288 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1289 if expression.meta.get("quoted_table"): 1290 table_parts = ".".join(p.name for p in expression.parts) 1291 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1292 1293 return super().table_parts(expression) 1294 1295 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1296 this = expression.this 1297 if isinstance(this, exp.TsOrDsToDatetime): 1298 func_name = "FORMAT_DATETIME" 1299 elif isinstance(this, exp.TsOrDsToTimestamp): 1300 func_name = "FORMAT_TIMESTAMP" 1301 else: 1302 func_name = "FORMAT_DATE" 1303 1304 time_expr = ( 1305 this 1306 if isinstance(this, (exp.TsOrDsToDatetime, exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 1307 else expression 1308 ) 1309 return self.func( 1310 func_name, self.format_time(expression), time_expr.this, expression.args.get("zone") 1311 ) 1312 1313 def eq_sql(self, expression: exp.EQ) -> str: 1314 # Operands of = cannot be NULL in BigQuery 1315 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1316 if not isinstance(expression.parent, exp.Update): 1317 return "NULL" 1318 1319 return self.binary(expression, "=") 1320 1321 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1322 parent = expression.parent 1323 1324 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1325 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1326 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1327 return self.func( 1328 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1329 ) 1330 1331 return super().attimezone_sql(expression) 1332 1333 def trycast_sql(self, expression: exp.TryCast) -> str: 1334 return self.cast_sql(expression, safe_prefix="SAFE_") 1335 1336 def bracket_sql(self, expression: exp.Bracket) -> str: 1337 this = expression.this 1338 expressions = expression.expressions 1339 1340 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1341 arg = expressions[0] 1342 if arg.type is None: 1343 from sqlglot.optimizer.annotate_types import annotate_types 1344 1345 arg = annotate_types(arg, dialect=self.dialect) 1346 1347 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1348 # BQ doesn't support bracket syntax with string values for structs 1349 return f"{self.sql(this)}.{arg.name}" 1350 1351 expressions_sql = self.expressions(expression, flat=True) 1352 offset = expression.args.get("offset") 1353 1354 if offset == 0: 1355 expressions_sql = f"OFFSET({expressions_sql})" 1356 elif offset == 1: 1357 expressions_sql = f"ORDINAL({expressions_sql})" 1358 elif offset is not None: 1359 self.unsupported(f"Unsupported array offset: {offset}") 1360 1361 if expression.args.get("safe"): 1362 expressions_sql = f"SAFE_{expressions_sql}" 1363 1364 return f"{self.sql(this)}[{expressions_sql}]" 1365 1366 def in_unnest_op(self, expression: exp.Unnest) -> str: 1367 return self.sql(expression) 1368 1369 def version_sql(self, expression: exp.Version) -> str: 1370 if expression.name == "TIMESTAMP": 1371 expression.set("this", "SYSTEM_TIME") 1372 return super().version_sql(expression) 1373 1374 def contains_sql(self, expression: exp.Contains) -> str: 1375 this = expression.this 1376 expr = expression.expression 1377 1378 if isinstance(this, exp.Lower) and isinstance(expr, exp.Lower): 1379 this = this.this 1380 expr = expr.this 1381 1382 return self.func("CONTAINS_SUBSTR", this, expr) 1383 1384 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1385 this = expression.this 1386 1387 # This ensures that inline type-annotated ARRAY literals like ARRAY<INT64>[1, 2, 3] 1388 # are roundtripped unaffected. The inner check excludes ARRAY(SELECT ...) expressions, 1389 # because they aren't literals and so the above syntax is invalid BigQuery. 1390 if isinstance(this, exp.Array): 1391 elem = seq_get(this.expressions, 0) 1392 if not (elem and elem.find(exp.Query)): 1393 return f"{self.sql(expression, 'to')}{self.sql(this)}" 1394 1395 return super().cast_sql(expression, safe_prefix=safe_prefix) 1396 1397 def declareitem_sql(self, expression: exp.DeclareItem) -> str: 1398 variables = self.expressions(expression, "this") 1399 default = self.sql(expression, "default") 1400 default = f" DEFAULT {default}" if default else "" 1401 kind = self.sql(expression, "kind") 1402 kind = f" {kind}" if kind else "" 1403 1404 return f"{variables}{kind}{default}"
First day of the week in DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday.
Whether the base comes first in the LOG function.
Possible values: True, False, None (two arguments are not supported by LOG)
Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
For example:
WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1
In most dialects, "my_id" would refer to "data.my_id" across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"
Whether the name of the function should be preserved inside the node's metadata, can be useful for roundtripping deprecated vs new functions that share an AST node e.g JSON_VALUE vs JSON_EXTRACT_SCALAR in BigQuery
Whether hex strings such as x'CC' evaluate to integer or binary/blob type
Specifies the strategy according to which identifiers should be normalized.
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Associates this dialect's time formats with their equivalent Python strftime formats.
Helper which is used for parsing the special syntax CAST(x AS DATE FORMAT 'yyyy').
If empty, the corresponding trie will be constructed off of TIME_MAPPING.
Columns that are auto-generated by the engine corresponding to this dialect.
For example, such columns may be excluded from SELECT * queries.
Whether a set operation uses DISTINCT by default. This is None when either DISTINCT or ALL
must be explicitly specified.
508 def normalize_identifier(self, expression: E) -> E: 509 if ( 510 isinstance(expression, exp.Identifier) 511 and self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE 512 ): 513 parent = expression.parent 514 while isinstance(parent, exp.Dot): 515 parent = parent.parent 516 517 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 518 # by default. The following check uses a heuristic to detect tables based on whether 519 # they are qualified. This should generally be correct, because tables in BigQuery 520 # must be qualified with at least a dataset, unless @@dataset_id is set. 521 case_sensitive = ( 522 isinstance(parent, exp.UserDefinedFunction) 523 or ( 524 isinstance(parent, exp.Table) 525 and parent.db 526 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 527 ) 528 or expression.meta.get("is_table") 529 ) 530 if not case_sensitive: 531 expression.set("this", expression.this.lower()) 532 533 return t.cast(E, expression) 534 535 return super().normalize_identifier(expression)
Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
For example, an identifier like FoO would be resolved as foo in Postgres, because it
lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so
it would resolve it as FOO. If it was quoted, it'd need to be treated as case-sensitive,
and so any normalization would be prohibited in order to avoid "breaking" the identifier.
There are also dialects like Spark, which are case-insensitive even when quotes are present, and dialects like MySQL, whose resolution rules match those employed by the underlying operating system, for example they may always be case-sensitive in Linux.
Finally, the normalization behavior of some engines can even be controlled through flags, like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
SQLGlot aims to understand and handle all of these different behaviors gracefully, so that it can analyze queries in the optimizer and successfully capture their semantics.
Mapping of an escaped sequence (\n) to its unescaped version (
).
537 class Tokenizer(tokens.Tokenizer): 538 QUOTES = ["'", '"', '"""', "'''"] 539 COMMENTS = ["--", "#", ("/*", "*/")] 540 IDENTIFIERS = ["`"] 541 STRING_ESCAPES = ["\\"] 542 543 HEX_STRINGS = [("0x", ""), ("0X", "")] 544 545 BYTE_STRINGS = [ 546 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 547 ] 548 549 RAW_STRINGS = [ 550 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 551 ] 552 553 NESTED_COMMENTS = False 554 555 KEYWORDS = { 556 **tokens.Tokenizer.KEYWORDS, 557 "ANY TYPE": TokenType.VARIANT, 558 "BEGIN": TokenType.COMMAND, 559 "BEGIN TRANSACTION": TokenType.BEGIN, 560 "BYTEINT": TokenType.INT, 561 "BYTES": TokenType.BINARY, 562 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 563 "DATETIME": TokenType.TIMESTAMP, 564 "DECLARE": TokenType.DECLARE, 565 "ELSEIF": TokenType.COMMAND, 566 "EXCEPTION": TokenType.COMMAND, 567 "EXPORT": TokenType.EXPORT, 568 "FLOAT64": TokenType.DOUBLE, 569 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 570 "MODEL": TokenType.MODEL, 571 "NOT DETERMINISTIC": TokenType.VOLATILE, 572 "RECORD": TokenType.STRUCT, 573 "TIMESTAMP": TokenType.TIMESTAMPTZ, 574 } 575 KEYWORDS.pop("DIV") 576 KEYWORDS.pop("VALUES") 577 KEYWORDS.pop("/*+")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- SINGLE_TOKENS
- BIT_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- VAR_SINGLE_TOKENS
- IDENTIFIER_ESCAPES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
579 class Parser(parser.Parser): 580 PREFIXED_PIVOT_COLUMNS = True 581 LOG_DEFAULTS_TO_LN = True 582 SUPPORTS_IMPLICIT_UNNEST = True 583 JOINS_HAVE_EQUAL_PRECEDENCE = True 584 585 # BigQuery does not allow ASC/DESC to be used as an identifier 586 ID_VAR_TOKENS = parser.Parser.ID_VAR_TOKENS - {TokenType.ASC, TokenType.DESC} 587 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - {TokenType.ASC, TokenType.DESC} 588 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - {TokenType.ASC, TokenType.DESC} 589 COMMENT_TABLE_ALIAS_TOKENS = parser.Parser.COMMENT_TABLE_ALIAS_TOKENS - { 590 TokenType.ASC, 591 TokenType.DESC, 592 } 593 UPDATE_ALIAS_TOKENS = parser.Parser.UPDATE_ALIAS_TOKENS - {TokenType.ASC, TokenType.DESC} 594 595 FUNCTIONS = { 596 **parser.Parser.FUNCTIONS, 597 "CONTAINS_SUBSTR": _build_contains_substring, 598 "DATE": _build_date, 599 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 600 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 601 "DATE_TRUNC": lambda args: exp.DateTrunc( 602 unit=seq_get(args, 1), 603 this=seq_get(args, 0), 604 zone=seq_get(args, 2), 605 ), 606 "DATETIME": _build_datetime, 607 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 608 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 609 "DIV": binary_from_function(exp.IntDiv), 610 "EDIT_DISTANCE": _build_levenshtein, 611 "FORMAT_DATE": _build_format_time(exp.TsOrDsToDate), 612 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 613 "JSON_EXTRACT_SCALAR": _build_extract_json_with_default_path(exp.JSONExtractScalar), 614 "JSON_EXTRACT_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 615 "JSON_QUERY": parser.build_extract_json_with_path(exp.JSONExtract), 616 "JSON_QUERY_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 617 "JSON_VALUE": _build_extract_json_with_default_path(exp.JSONExtractScalar), 618 "JSON_VALUE_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray), 619 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 620 "MD5": exp.MD5Digest.from_arg_list, 621 "TO_HEX": _build_to_hex, 622 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 623 [seq_get(args, 1), seq_get(args, 0)] 624 ), 625 "PARSE_TIMESTAMP": _build_parse_timestamp, 626 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 627 "REGEXP_EXTRACT": _build_regexp_extract(exp.RegexpExtract), 628 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 629 "REGEXP_EXTRACT_ALL": _build_regexp_extract( 630 exp.RegexpExtractAll, default_group=exp.Literal.number(0) 631 ), 632 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 633 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 634 "SPLIT": lambda args: exp.Split( 635 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 636 this=seq_get(args, 0), 637 expression=seq_get(args, 1) or exp.Literal.string(","), 638 ), 639 "STRPOS": exp.StrPosition.from_arg_list, 640 "TIME": _build_time, 641 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 642 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 643 "TIMESTAMP": _build_timestamp, 644 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 645 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 646 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 647 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 648 ), 649 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 650 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 651 ), 652 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 653 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 654 "FORMAT_DATETIME": _build_format_time(exp.TsOrDsToDatetime), 655 "FORMAT_TIMESTAMP": _build_format_time(exp.TsOrDsToTimestamp), 656 } 657 658 FUNCTION_PARSERS = { 659 **parser.Parser.FUNCTION_PARSERS, 660 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 661 "JSON_ARRAY": lambda self: self.expression( 662 exp.JSONArray, expressions=self._parse_csv(self._parse_bitwise) 663 ), 664 "MAKE_INTERVAL": lambda self: self._parse_make_interval(), 665 "FEATURES_AT_TIME": lambda self: self._parse_features_at_time(), 666 } 667 FUNCTION_PARSERS.pop("TRIM") 668 669 NO_PAREN_FUNCTIONS = { 670 **parser.Parser.NO_PAREN_FUNCTIONS, 671 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 672 } 673 674 NESTED_TYPE_TOKENS = { 675 *parser.Parser.NESTED_TYPE_TOKENS, 676 TokenType.TABLE, 677 } 678 679 PROPERTY_PARSERS = { 680 **parser.Parser.PROPERTY_PARSERS, 681 "NOT DETERMINISTIC": lambda self: self.expression( 682 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 683 ), 684 "OPTIONS": lambda self: self._parse_with_property(), 685 } 686 687 CONSTRAINT_PARSERS = { 688 **parser.Parser.CONSTRAINT_PARSERS, 689 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 690 } 691 692 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 693 RANGE_PARSERS.pop(TokenType.OVERLAPS) 694 695 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 696 697 DASHED_TABLE_PART_FOLLOW_TOKENS = {TokenType.DOT, TokenType.L_PAREN, TokenType.R_PAREN} 698 699 STATEMENT_PARSERS = { 700 **parser.Parser.STATEMENT_PARSERS, 701 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 702 TokenType.END: lambda self: self._parse_as_command(self._prev), 703 TokenType.FOR: lambda self: self._parse_for_in(), 704 TokenType.EXPORT: lambda self: self._parse_export_data(), 705 TokenType.DECLARE: lambda self: self._parse_declare(), 706 } 707 708 BRACKET_OFFSETS = { 709 "OFFSET": (0, False), 710 "ORDINAL": (1, False), 711 "SAFE_OFFSET": (0, True), 712 "SAFE_ORDINAL": (1, True), 713 } 714 715 def _parse_for_in(self) -> exp.ForIn: 716 this = self._parse_range() 717 self._match_text_seq("DO") 718 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 719 720 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 721 this = super()._parse_table_part(schema=schema) or self._parse_number() 722 723 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 724 if isinstance(this, exp.Identifier): 725 table_name = this.name 726 while self._match(TokenType.DASH, advance=False) and self._next: 727 start = self._curr 728 while self._is_connected() and not self._match_set( 729 self.DASHED_TABLE_PART_FOLLOW_TOKENS, advance=False 730 ): 731 self._advance() 732 733 if start == self._curr: 734 break 735 736 table_name += self._find_sql(start, self._prev) 737 738 this = exp.Identifier( 739 this=table_name, quoted=this.args.get("quoted") 740 ).update_positions(this) 741 elif isinstance(this, exp.Literal): 742 table_name = this.name 743 744 if self._is_connected() and self._parse_var(any_token=True): 745 table_name += self._prev.text 746 747 this = exp.Identifier(this=table_name, quoted=True).update_positions(this) 748 749 return this 750 751 def _parse_table_parts( 752 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 753 ) -> exp.Table: 754 table = super()._parse_table_parts( 755 schema=schema, is_db_reference=is_db_reference, wildcard=True 756 ) 757 758 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 759 if not table.catalog: 760 if table.db: 761 previous_db = table.args["db"] 762 parts = table.db.split(".") 763 if len(parts) == 2 and not table.args["db"].quoted: 764 table.set( 765 "catalog", exp.Identifier(this=parts[0]).update_positions(previous_db) 766 ) 767 table.set("db", exp.Identifier(this=parts[1]).update_positions(previous_db)) 768 else: 769 previous_this = table.this 770 parts = table.name.split(".") 771 if len(parts) == 2 and not table.this.quoted: 772 table.set( 773 "db", exp.Identifier(this=parts[0]).update_positions(previous_this) 774 ) 775 table.set( 776 "this", exp.Identifier(this=parts[1]).update_positions(previous_this) 777 ) 778 779 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 780 alias = table.this 781 catalog, db, this, *rest = ( 782 exp.to_identifier(p, quoted=True) 783 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 784 ) 785 786 for part in (catalog, db, this): 787 if part: 788 part.update_positions(table.this) 789 790 if rest and this: 791 this = exp.Dot.build([this, *rest]) # type: ignore 792 793 table = exp.Table( 794 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 795 ) 796 table.meta["quoted_table"] = True 797 else: 798 alias = None 799 800 # The `INFORMATION_SCHEMA` views in BigQuery need to be qualified by a region or 801 # dataset, so if the project identifier is omitted we need to fix the ast so that 802 # the `INFORMATION_SCHEMA.X` bit is represented as a single (quoted) Identifier. 803 # Otherwise, we wouldn't correctly qualify a `Table` node that references these 804 # views, because it would seem like the "catalog" part is set, when it'd actually 805 # be the region/dataset. Merging the two identifiers into a single one is done to 806 # avoid producing a 4-part Table reference, which would cause issues in the schema 807 # module, when there are 3-part table names mixed with information schema views. 808 # 809 # See: https://cloud.google.com/bigquery/docs/information-schema-intro#syntax 810 table_parts = table.parts 811 if len(table_parts) > 1 and table_parts[-2].name.upper() == "INFORMATION_SCHEMA": 812 # We need to alias the table here to avoid breaking existing qualified columns. 813 # This is expected to be safe, because if there's an actual alias coming up in 814 # the token stream, it will overwrite this one. If there isn't one, we are only 815 # exposing the name that can be used to reference the view explicitly (a no-op). 816 exp.alias_( 817 table, 818 t.cast(exp.Identifier, alias or table_parts[-1]), 819 table=True, 820 copy=False, 821 ) 822 823 info_schema_view = f"{table_parts[-2].name}.{table_parts[-1].name}" 824 new_this = exp.Identifier(this=info_schema_view, quoted=True).update_positions( 825 line=table_parts[-2].meta.get("line"), 826 col=table_parts[-1].meta.get("col"), 827 start=table_parts[-2].meta.get("start"), 828 end=table_parts[-1].meta.get("end"), 829 ) 830 table.set("this", new_this) 831 table.set("db", seq_get(table_parts, -3)) 832 table.set("catalog", seq_get(table_parts, -4)) 833 834 return table 835 836 def _parse_column(self) -> t.Optional[exp.Expression]: 837 column = super()._parse_column() 838 if isinstance(column, exp.Column): 839 parts = column.parts 840 if any("." in p.name for p in parts): 841 catalog, db, table, this, *rest = ( 842 exp.to_identifier(p, quoted=True) 843 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 844 ) 845 846 if rest and this: 847 this = exp.Dot.build([this, *rest]) # type: ignore 848 849 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 850 column.meta["quoted_column"] = True 851 852 return column 853 854 @t.overload 855 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 856 857 @t.overload 858 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 859 860 def _parse_json_object(self, agg=False): 861 json_object = super()._parse_json_object() 862 array_kv_pair = seq_get(json_object.expressions, 0) 863 864 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 865 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 866 if ( 867 array_kv_pair 868 and isinstance(array_kv_pair.this, exp.Array) 869 and isinstance(array_kv_pair.expression, exp.Array) 870 ): 871 keys = array_kv_pair.this.expressions 872 values = array_kv_pair.expression.expressions 873 874 json_object.set( 875 "expressions", 876 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 877 ) 878 879 return json_object 880 881 def _parse_bracket( 882 self, this: t.Optional[exp.Expression] = None 883 ) -> t.Optional[exp.Expression]: 884 bracket = super()._parse_bracket(this) 885 886 if this is bracket: 887 return bracket 888 889 if isinstance(bracket, exp.Bracket): 890 for expression in bracket.expressions: 891 name = expression.name.upper() 892 893 if name not in self.BRACKET_OFFSETS: 894 break 895 896 offset, safe = self.BRACKET_OFFSETS[name] 897 bracket.set("offset", offset) 898 bracket.set("safe", safe) 899 expression.replace(expression.expressions[0]) 900 901 return bracket 902 903 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 904 unnest = super()._parse_unnest(with_alias=with_alias) 905 906 if not unnest: 907 return None 908 909 unnest_expr = seq_get(unnest.expressions, 0) 910 if unnest_expr: 911 from sqlglot.optimizer.annotate_types import annotate_types 912 913 unnest_expr = annotate_types(unnest_expr, dialect=self.dialect) 914 915 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 916 # in contrast to other dialects such as DuckDB which flattens only the array by default 917 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 918 array_elem.is_type(exp.DataType.Type.STRUCT) 919 for array_elem in unnest_expr._type.expressions 920 ): 921 unnest.set("explode_array", True) 922 923 return unnest 924 925 def _parse_make_interval(self) -> exp.MakeInterval: 926 expr = exp.MakeInterval() 927 928 for arg_key in expr.arg_types: 929 value = self._parse_lambda() 930 931 if not value: 932 break 933 934 # Non-named arguments are filled sequentially, (optionally) followed by named arguments 935 # that can appear in any order e.g MAKE_INTERVAL(1, minute => 5, day => 2) 936 if isinstance(value, exp.Kwarg): 937 arg_key = value.this.name 938 939 expr.set(arg_key, value) 940 941 self._match(TokenType.COMMA) 942 943 return expr 944 945 def _parse_features_at_time(self) -> exp.FeaturesAtTime: 946 expr = self.expression( 947 exp.FeaturesAtTime, 948 this=(self._match(TokenType.TABLE) and self._parse_table()) 949 or self._parse_select(nested=True), 950 ) 951 952 while self._match(TokenType.COMMA): 953 arg = self._parse_lambda() 954 955 # Get the LHS of the Kwarg and set the arg to that value, e.g 956 # "num_rows => 1" sets the expr's `num_rows` arg 957 if arg: 958 expr.set(arg.this.name, arg) 959 960 return expr 961 962 def _parse_export_data(self) -> exp.Export: 963 self._match_text_seq("DATA") 964 965 return self.expression( 966 exp.Export, 967 connection=self._match_text_seq("WITH", "CONNECTION") and self._parse_table_parts(), 968 options=self._parse_properties(), 969 this=self._match_text_seq("AS") and self._parse_select(), 970 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- COLON_PLACEHOLDER_TOKENS
- ARRAY_CONSTRUCTORS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- CAST_COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- PIPE_SYNTAX_TRANSFORM_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- QUERY_MODIFIER_TOKENS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- WINDOW_EXCLUDE_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- IDENTIFY_PIVOT_STRINGS
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- ALTER_RENAME_REQUIRES_COLUMN
- ZONE_AWARE_TIMESTAMP_CONSTRUCTOR
- MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS
- JSON_EXTRACT_REQUIRES_JSON_EXPRESSION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- parse_set_operation
- build_cast
- errors
- sql
972 class Generator(generator.Generator): 973 INTERVAL_ALLOWS_PLURAL_FORM = False 974 JOIN_HINTS = False 975 QUERY_HINTS = False 976 TABLE_HINTS = False 977 LIMIT_FETCH = "LIMIT" 978 RENAME_TABLE_WITH_DB = False 979 NVL2_SUPPORTED = False 980 UNNEST_WITH_ORDINALITY = False 981 COLLATE_IS_FUNC = True 982 LIMIT_ONLY_LITERALS = True 983 SUPPORTS_TABLE_ALIAS_COLUMNS = False 984 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 985 JSON_KEY_VALUE_PAIR_SEP = "," 986 NULL_ORDERING_SUPPORTED = False 987 IGNORE_NULLS_IN_FUNC = True 988 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 989 CAN_IMPLEMENT_ARRAY_ANY = True 990 SUPPORTS_TO_NUMBER = False 991 NAMED_PLACEHOLDER_TOKEN = "@" 992 HEX_FUNC = "TO_HEX" 993 WITH_PROPERTIES_PREFIX = "OPTIONS" 994 SUPPORTS_EXPLODING_PROJECTIONS = False 995 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 996 SUPPORTS_UNIX_SECONDS = True 997 998 TRANSFORMS = { 999 **generator.Generator.TRANSFORMS, 1000 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 1001 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 1002 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 1003 exp.Array: inline_array_unless_query, 1004 exp.ArrayContains: _array_contains_sql, 1005 exp.ArrayFilter: filter_array_using_unnest, 1006 exp.ArrayRemove: filter_array_using_unnest, 1007 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 1008 exp.CollateProperty: lambda self, e: ( 1009 f"DEFAULT COLLATE {self.sql(e, 'this')}" 1010 if e.args.get("default") 1011 else f"COLLATE {self.sql(e, 'this')}" 1012 ), 1013 exp.Commit: lambda *_: "COMMIT TRANSACTION", 1014 exp.CountIf: rename_func("COUNTIF"), 1015 exp.Create: _create_sql, 1016 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 1017 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 1018 exp.DateDiff: lambda self, e: self.func( 1019 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 1020 ), 1021 exp.DateFromParts: rename_func("DATE"), 1022 exp.DateStrToDate: datestrtodate_sql, 1023 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 1024 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 1025 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 1026 exp.FromTimeZone: lambda self, e: self.func( 1027 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 1028 ), 1029 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 1030 exp.GroupConcat: lambda self, e: groupconcat_sql( 1031 self, e, func_name="STRING_AGG", within_group=False 1032 ), 1033 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 1034 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 1035 exp.If: if_sql(false_value="NULL"), 1036 exp.ILike: no_ilike_sql, 1037 exp.IntDiv: rename_func("DIV"), 1038 exp.Int64: rename_func("INT64"), 1039 exp.JSONExtract: _json_extract_sql, 1040 exp.JSONExtractArray: _json_extract_sql, 1041 exp.JSONExtractScalar: _json_extract_sql, 1042 exp.JSONFormat: rename_func("TO_JSON_STRING"), 1043 exp.Levenshtein: _levenshtein_sql, 1044 exp.Max: max_or_greatest, 1045 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 1046 exp.MD5Digest: rename_func("MD5"), 1047 exp.Min: min_or_least, 1048 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1049 exp.RegexpExtract: lambda self, e: self.func( 1050 "REGEXP_EXTRACT", 1051 e.this, 1052 e.expression, 1053 e.args.get("position"), 1054 e.args.get("occurrence"), 1055 ), 1056 exp.RegexpExtractAll: lambda self, e: self.func( 1057 "REGEXP_EXTRACT_ALL", e.this, e.expression 1058 ), 1059 exp.RegexpReplace: regexp_replace_sql, 1060 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 1061 exp.ReturnsProperty: _returnsproperty_sql, 1062 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 1063 exp.Select: transforms.preprocess( 1064 [ 1065 transforms.explode_projection_to_unnest(), 1066 transforms.unqualify_unnest, 1067 transforms.eliminate_distinct_on, 1068 _alias_ordered_group, 1069 transforms.eliminate_semi_and_anti_joins, 1070 ] 1071 ), 1072 exp.SHA: rename_func("SHA1"), 1073 exp.SHA2: sha256_sql, 1074 exp.Space: space_sql, 1075 exp.StabilityProperty: lambda self, e: ( 1076 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 1077 ), 1078 exp.String: rename_func("STRING"), 1079 exp.StrPosition: lambda self, e: ( 1080 strposition_sql( 1081 self, e, func_name="INSTR", supports_position=True, supports_occurrence=True 1082 ) 1083 ), 1084 exp.StrToDate: _str_to_datetime_sql, 1085 exp.StrToTime: _str_to_datetime_sql, 1086 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 1087 exp.TimeFromParts: rename_func("TIME"), 1088 exp.TimestampFromParts: rename_func("DATETIME"), 1089 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 1090 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 1091 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 1092 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 1093 exp.TimeStrToTime: timestrtotime_sql, 1094 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 1095 exp.TsOrDsAdd: _ts_or_ds_add_sql, 1096 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 1097 exp.TsOrDsToTime: rename_func("TIME"), 1098 exp.TsOrDsToDatetime: rename_func("DATETIME"), 1099 exp.TsOrDsToTimestamp: rename_func("TIMESTAMP"), 1100 exp.Unhex: rename_func("FROM_HEX"), 1101 exp.UnixDate: rename_func("UNIX_DATE"), 1102 exp.UnixToTime: _unix_to_time_sql, 1103 exp.Uuid: lambda *_: "GENERATE_UUID()", 1104 exp.Values: _derived_table_values_to_unnest, 1105 exp.VariancePop: rename_func("VAR_POP"), 1106 exp.SafeDivide: rename_func("SAFE_DIVIDE"), 1107 } 1108 1109 SUPPORTED_JSON_PATH_PARTS = { 1110 exp.JSONPathKey, 1111 exp.JSONPathRoot, 1112 exp.JSONPathSubscript, 1113 } 1114 1115 TYPE_MAPPING = { 1116 **generator.Generator.TYPE_MAPPING, 1117 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 1118 exp.DataType.Type.BIGINT: "INT64", 1119 exp.DataType.Type.BINARY: "BYTES", 1120 exp.DataType.Type.BLOB: "BYTES", 1121 exp.DataType.Type.BOOLEAN: "BOOL", 1122 exp.DataType.Type.CHAR: "STRING", 1123 exp.DataType.Type.DECIMAL: "NUMERIC", 1124 exp.DataType.Type.DOUBLE: "FLOAT64", 1125 exp.DataType.Type.FLOAT: "FLOAT64", 1126 exp.DataType.Type.INT: "INT64", 1127 exp.DataType.Type.NCHAR: "STRING", 1128 exp.DataType.Type.NVARCHAR: "STRING", 1129 exp.DataType.Type.SMALLINT: "INT64", 1130 exp.DataType.Type.TEXT: "STRING", 1131 exp.DataType.Type.TIMESTAMP: "DATETIME", 1132 exp.DataType.Type.TIMESTAMPNTZ: "DATETIME", 1133 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 1134 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 1135 exp.DataType.Type.TINYINT: "INT64", 1136 exp.DataType.Type.ROWVERSION: "BYTES", 1137 exp.DataType.Type.UUID: "STRING", 1138 exp.DataType.Type.VARBINARY: "BYTES", 1139 exp.DataType.Type.VARCHAR: "STRING", 1140 exp.DataType.Type.VARIANT: "ANY TYPE", 1141 } 1142 1143 PROPERTIES_LOCATION = { 1144 **generator.Generator.PROPERTIES_LOCATION, 1145 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1146 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1147 } 1148 1149 # WINDOW comes after QUALIFY 1150 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 1151 AFTER_HAVING_MODIFIER_TRANSFORMS = { 1152 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 1153 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 1154 } 1155 1156 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 1157 RESERVED_KEYWORDS = { 1158 "all", 1159 "and", 1160 "any", 1161 "array", 1162 "as", 1163 "asc", 1164 "assert_rows_modified", 1165 "at", 1166 "between", 1167 "by", 1168 "case", 1169 "cast", 1170 "collate", 1171 "contains", 1172 "create", 1173 "cross", 1174 "cube", 1175 "current", 1176 "default", 1177 "define", 1178 "desc", 1179 "distinct", 1180 "else", 1181 "end", 1182 "enum", 1183 "escape", 1184 "except", 1185 "exclude", 1186 "exists", 1187 "extract", 1188 "false", 1189 "fetch", 1190 "following", 1191 "for", 1192 "from", 1193 "full", 1194 "group", 1195 "grouping", 1196 "groups", 1197 "hash", 1198 "having", 1199 "if", 1200 "ignore", 1201 "in", 1202 "inner", 1203 "intersect", 1204 "interval", 1205 "into", 1206 "is", 1207 "join", 1208 "lateral", 1209 "left", 1210 "like", 1211 "limit", 1212 "lookup", 1213 "merge", 1214 "natural", 1215 "new", 1216 "no", 1217 "not", 1218 "null", 1219 "nulls", 1220 "of", 1221 "on", 1222 "or", 1223 "order", 1224 "outer", 1225 "over", 1226 "partition", 1227 "preceding", 1228 "proto", 1229 "qualify", 1230 "range", 1231 "recursive", 1232 "respect", 1233 "right", 1234 "rollup", 1235 "rows", 1236 "select", 1237 "set", 1238 "some", 1239 "struct", 1240 "tablesample", 1241 "then", 1242 "to", 1243 "treat", 1244 "true", 1245 "unbounded", 1246 "union", 1247 "unnest", 1248 "using", 1249 "when", 1250 "where", 1251 "window", 1252 "with", 1253 "within", 1254 } 1255 1256 def datetrunc_sql(self, expression: exp.DateTrunc) -> str: 1257 unit = expression.unit 1258 unit_sql = unit.name if unit.is_string else self.sql(unit) 1259 return self.func("DATE_TRUNC", expression.this, unit_sql, expression.args.get("zone")) 1260 1261 def mod_sql(self, expression: exp.Mod) -> str: 1262 this = expression.this 1263 expr = expression.expression 1264 return self.func( 1265 "MOD", 1266 this.unnest() if isinstance(this, exp.Paren) else this, 1267 expr.unnest() if isinstance(expr, exp.Paren) else expr, 1268 ) 1269 1270 def column_parts(self, expression: exp.Column) -> str: 1271 if expression.meta.get("quoted_column"): 1272 # If a column reference is of the form `dataset.table`.name, we need 1273 # to preserve the quoted table path, otherwise the reference breaks 1274 table_parts = ".".join(p.name for p in expression.parts[:-1]) 1275 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 1276 return f"{table_path}.{self.sql(expression, 'this')}" 1277 1278 return super().column_parts(expression) 1279 1280 def table_parts(self, expression: exp.Table) -> str: 1281 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1282 # we need to make sure the correct quoting is used in each case. 1283 # 1284 # For example, if there is a CTE x that clashes with a schema name, then the former will 1285 # return the table y in that schema, whereas the latter will return the CTE's y column: 1286 # 1287 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1288 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1289 if expression.meta.get("quoted_table"): 1290 table_parts = ".".join(p.name for p in expression.parts) 1291 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1292 1293 return super().table_parts(expression) 1294 1295 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1296 this = expression.this 1297 if isinstance(this, exp.TsOrDsToDatetime): 1298 func_name = "FORMAT_DATETIME" 1299 elif isinstance(this, exp.TsOrDsToTimestamp): 1300 func_name = "FORMAT_TIMESTAMP" 1301 else: 1302 func_name = "FORMAT_DATE" 1303 1304 time_expr = ( 1305 this 1306 if isinstance(this, (exp.TsOrDsToDatetime, exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 1307 else expression 1308 ) 1309 return self.func( 1310 func_name, self.format_time(expression), time_expr.this, expression.args.get("zone") 1311 ) 1312 1313 def eq_sql(self, expression: exp.EQ) -> str: 1314 # Operands of = cannot be NULL in BigQuery 1315 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1316 if not isinstance(expression.parent, exp.Update): 1317 return "NULL" 1318 1319 return self.binary(expression, "=") 1320 1321 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1322 parent = expression.parent 1323 1324 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1325 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1326 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1327 return self.func( 1328 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1329 ) 1330 1331 return super().attimezone_sql(expression) 1332 1333 def trycast_sql(self, expression: exp.TryCast) -> str: 1334 return self.cast_sql(expression, safe_prefix="SAFE_") 1335 1336 def bracket_sql(self, expression: exp.Bracket) -> str: 1337 this = expression.this 1338 expressions = expression.expressions 1339 1340 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1341 arg = expressions[0] 1342 if arg.type is None: 1343 from sqlglot.optimizer.annotate_types import annotate_types 1344 1345 arg = annotate_types(arg, dialect=self.dialect) 1346 1347 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1348 # BQ doesn't support bracket syntax with string values for structs 1349 return f"{self.sql(this)}.{arg.name}" 1350 1351 expressions_sql = self.expressions(expression, flat=True) 1352 offset = expression.args.get("offset") 1353 1354 if offset == 0: 1355 expressions_sql = f"OFFSET({expressions_sql})" 1356 elif offset == 1: 1357 expressions_sql = f"ORDINAL({expressions_sql})" 1358 elif offset is not None: 1359 self.unsupported(f"Unsupported array offset: {offset}") 1360 1361 if expression.args.get("safe"): 1362 expressions_sql = f"SAFE_{expressions_sql}" 1363 1364 return f"{self.sql(this)}[{expressions_sql}]" 1365 1366 def in_unnest_op(self, expression: exp.Unnest) -> str: 1367 return self.sql(expression) 1368 1369 def version_sql(self, expression: exp.Version) -> str: 1370 if expression.name == "TIMESTAMP": 1371 expression.set("this", "SYSTEM_TIME") 1372 return super().version_sql(expression) 1373 1374 def contains_sql(self, expression: exp.Contains) -> str: 1375 this = expression.this 1376 expr = expression.expression 1377 1378 if isinstance(this, exp.Lower) and isinstance(expr, exp.Lower): 1379 this = this.this 1380 expr = expr.this 1381 1382 return self.func("CONTAINS_SUBSTR", this, expr) 1383 1384 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1385 this = expression.this 1386 1387 # This ensures that inline type-annotated ARRAY literals like ARRAY<INT64>[1, 2, 3] 1388 # are roundtripped unaffected. The inner check excludes ARRAY(SELECT ...) expressions, 1389 # because they aren't literals and so the above syntax is invalid BigQuery. 1390 if isinstance(this, exp.Array): 1391 elem = seq_get(this.expressions, 0) 1392 if not (elem and elem.find(exp.Query)): 1393 return f"{self.sql(expression, 'to')}{self.sql(this)}" 1394 1395 return super().cast_sql(expression, safe_prefix=safe_prefix) 1396 1397 def declareitem_sql(self, expression: exp.DeclareItem) -> str: 1398 variables = self.expressions(expression, "this") 1399 default = self.sql(expression, "default") 1400 default = f" DEFAULT {default}" if default else "" 1401 kind = self.sql(expression, "kind") 1402 kind = f" {kind}" if kind else "" 1403 1404 return f"{variables}{kind}{default}"
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHEREclause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
1270 def column_parts(self, expression: exp.Column) -> str: 1271 if expression.meta.get("quoted_column"): 1272 # If a column reference is of the form `dataset.table`.name, we need 1273 # to preserve the quoted table path, otherwise the reference breaks 1274 table_parts = ".".join(p.name for p in expression.parts[:-1]) 1275 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 1276 return f"{table_path}.{self.sql(expression, 'this')}" 1277 1278 return super().column_parts(expression)
1280 def table_parts(self, expression: exp.Table) -> str: 1281 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1282 # we need to make sure the correct quoting is used in each case. 1283 # 1284 # For example, if there is a CTE x that clashes with a schema name, then the former will 1285 # return the table y in that schema, whereas the latter will return the CTE's y column: 1286 # 1287 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1288 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1289 if expression.meta.get("quoted_table"): 1290 table_parts = ".".join(p.name for p in expression.parts) 1291 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1292 1293 return super().table_parts(expression)
1295 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1296 this = expression.this 1297 if isinstance(this, exp.TsOrDsToDatetime): 1298 func_name = "FORMAT_DATETIME" 1299 elif isinstance(this, exp.TsOrDsToTimestamp): 1300 func_name = "FORMAT_TIMESTAMP" 1301 else: 1302 func_name = "FORMAT_DATE" 1303 1304 time_expr = ( 1305 this 1306 if isinstance(this, (exp.TsOrDsToDatetime, exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 1307 else expression 1308 ) 1309 return self.func( 1310 func_name, self.format_time(expression), time_expr.this, expression.args.get("zone") 1311 )
1313 def eq_sql(self, expression: exp.EQ) -> str: 1314 # Operands of = cannot be NULL in BigQuery 1315 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1316 if not isinstance(expression.parent, exp.Update): 1317 return "NULL" 1318 1319 return self.binary(expression, "=")
1321 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1322 parent = expression.parent 1323 1324 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1325 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1326 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1327 return self.func( 1328 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1329 ) 1330 1331 return super().attimezone_sql(expression)
1336 def bracket_sql(self, expression: exp.Bracket) -> str: 1337 this = expression.this 1338 expressions = expression.expressions 1339 1340 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1341 arg = expressions[0] 1342 if arg.type is None: 1343 from sqlglot.optimizer.annotate_types import annotate_types 1344 1345 arg = annotate_types(arg, dialect=self.dialect) 1346 1347 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1348 # BQ doesn't support bracket syntax with string values for structs 1349 return f"{self.sql(this)}.{arg.name}" 1350 1351 expressions_sql = self.expressions(expression, flat=True) 1352 offset = expression.args.get("offset") 1353 1354 if offset == 0: 1355 expressions_sql = f"OFFSET({expressions_sql})" 1356 elif offset == 1: 1357 expressions_sql = f"ORDINAL({expressions_sql})" 1358 elif offset is not None: 1359 self.unsupported(f"Unsupported array offset: {offset}") 1360 1361 if expression.args.get("safe"): 1362 expressions_sql = f"SAFE_{expressions_sql}" 1363 1364 return f"{self.sql(this)}[{expressions_sql}]"
1384 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1385 this = expression.this 1386 1387 # This ensures that inline type-annotated ARRAY literals like ARRAY<INT64>[1, 2, 3] 1388 # are roundtripped unaffected. The inner check excludes ARRAY(SELECT ...) expressions, 1389 # because they aren't literals and so the above syntax is invalid BigQuery. 1390 if isinstance(this, exp.Array): 1391 elem = seq_get(this.expressions, 0) 1392 if not (elem and elem.find(exp.Query)): 1393 return f"{self.sql(expression, 'to')}{self.sql(this)}" 1394 1395 return super().cast_sql(expression, safe_prefix=safe_prefix)
1397 def declareitem_sql(self, expression: exp.DeclareItem) -> str: 1398 variables = self.expressions(expression, "this") 1399 default = self.sql(expression, "default") 1400 default = f" DEFAULT {default}" if default else "" 1401 kind = self.sql(expression, "kind") 1402 kind = f" {kind}" if kind else "" 1403 1404 return f"{variables}{kind}{default}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- SELECT_KINDS
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- SUPPORTS_WINDOW_EXCLUDE
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- STAR_EXCEPT
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_MEDIAN
- ALTER_SET_WRAPPED
- NORMALIZE_EXTRACT_DATE_PARTS
- PARSE_JSON_NAME
- ARRAY_SIZE_NAME
- ALTER_SET_TYPE
- ARRAY_SIZE_DIM_REQUIRED
- SUPPORTS_BETWEEN_FLAGS
- SUPPORTS_LIKE_QUANTIFIERS
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- sanitize_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_sql
- tablefromrows_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- for_modifiers
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- unnest_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- formatphrase_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- addpartition_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- is_sql
- like_sql
- ilike_sql
- similarto_sql
- lt_sql
- lte_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- jsoncast_sql
- try_sql
- log_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- featuresattime_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- xmlkeyvalueoption_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- show_sql
- get_put_sql
- translatecharacters_sql
- decodecase_sql
- semanticview_sql
- getextract_sql