sqlglot.dialects.clickhouse
1from __future__ import annotations 2 3import typing as t 4import datetime 5 6from sqlglot import exp, generator, parser, tokens 7from sqlglot.dialects.dialect import ( 8 Dialect, 9 NormalizationStrategy, 10 arg_max_or_min_no_count, 11 build_date_delta, 12 build_formatted_time, 13 inline_array_sql, 14 json_extract_segments, 15 json_path_key_only_name, 16 no_pivot_sql, 17 build_json_extract_path, 18 rename_func, 19 sha256_sql, 20 var_map_sql, 21 timestamptrunc_sql, 22 unit_to_var, 23 trim_sql, 24) 25from sqlglot.generator import Generator 26from sqlglot.helper import is_int, seq_get 27from sqlglot.tokens import Token, TokenType 28 29DATEΤΙΜΕ_DELTA = t.Union[exp.DateAdd, exp.DateDiff, exp.DateSub, exp.TimestampSub, exp.TimestampAdd] 30 31 32def _build_date_format(args: t.List) -> exp.TimeToStr: 33 expr = build_formatted_time(exp.TimeToStr, "clickhouse")(args) 34 35 timezone = seq_get(args, 2) 36 if timezone: 37 expr.set("zone", timezone) 38 39 return expr 40 41 42def _unix_to_time_sql(self: ClickHouse.Generator, expression: exp.UnixToTime) -> str: 43 scale = expression.args.get("scale") 44 timestamp = expression.this 45 46 if scale in (None, exp.UnixToTime.SECONDS): 47 return self.func("fromUnixTimestamp", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 48 if scale == exp.UnixToTime.MILLIS: 49 return self.func("fromUnixTimestamp64Milli", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 50 if scale == exp.UnixToTime.MICROS: 51 return self.func("fromUnixTimestamp64Micro", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 52 if scale == exp.UnixToTime.NANOS: 53 return self.func("fromUnixTimestamp64Nano", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 54 55 return self.func( 56 "fromUnixTimestamp", 57 exp.cast( 58 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 59 ), 60 ) 61 62 63def _lower_func(sql: str) -> str: 64 index = sql.index("(") 65 return sql[:index].lower() + sql[index:] 66 67 68def _quantile_sql(self: ClickHouse.Generator, expression: exp.Quantile) -> str: 69 quantile = expression.args["quantile"] 70 args = f"({self.sql(expression, 'this')})" 71 72 if isinstance(quantile, exp.Array): 73 func = self.func("quantiles", *quantile) 74 else: 75 func = self.func("quantile", quantile) 76 77 return func + args 78 79 80def _build_count_if(args: t.List) -> exp.CountIf | exp.CombinedAggFunc: 81 if len(args) == 1: 82 return exp.CountIf(this=seq_get(args, 0)) 83 84 return exp.CombinedAggFunc(this="countIf", expressions=args, parts=("count", "If")) 85 86 87def _build_str_to_date(args: t.List) -> exp.Cast | exp.Anonymous: 88 if len(args) == 3: 89 return exp.Anonymous(this="STR_TO_DATE", expressions=args) 90 91 strtodate = exp.StrToDate.from_arg_list(args) 92 return exp.cast(strtodate, exp.DataType.build(exp.DataType.Type.DATETIME)) 93 94 95def _datetime_delta_sql(name: str) -> t.Callable[[Generator, DATEΤΙΜΕ_DELTA], str]: 96 def _delta_sql(self: Generator, expression: DATEΤΙΜΕ_DELTA) -> str: 97 if not expression.unit: 98 return rename_func(name)(self, expression) 99 100 return self.func( 101 name, 102 unit_to_var(expression), 103 expression.expression, 104 expression.this, 105 ) 106 107 return _delta_sql 108 109 110def _timestrtotime_sql(self: ClickHouse.Generator, expression: exp.TimeStrToTime): 111 tz = expression.args.get("zone") 112 datatype = exp.DataType.build(exp.DataType.Type.TIMESTAMP) 113 ts = expression.this 114 if tz: 115 # build a datatype that encodes the timezone as a type parameter, eg DateTime('America/Los_Angeles') 116 datatype = exp.DataType.build( 117 exp.DataType.Type.TIMESTAMPTZ, # Type.TIMESTAMPTZ maps to DateTime 118 expressions=[exp.DataTypeParam(this=tz)], 119 ) 120 121 if isinstance(ts, exp.Literal): 122 # strip the timezone out of the literal, eg turn '2020-01-01 12:13:14-08:00' into '2020-01-01 12:13:14' 123 # this is because Clickhouse encodes the timezone as a data type parameter and throws an error if it's part of the timestamp string 124 ts_without_tz = ( 125 datetime.datetime.fromisoformat(ts.name).replace(tzinfo=None).isoformat(sep=" ") 126 ) 127 ts = exp.Literal.string(ts_without_tz) 128 129 return self.sql(exp.cast(ts, datatype, dialect=self.dialect)) 130 131 132class ClickHouse(Dialect): 133 NORMALIZE_FUNCTIONS: bool | str = False 134 NULL_ORDERING = "nulls_are_last" 135 SUPPORTS_USER_DEFINED_TYPES = False 136 SAFE_DIVISION = True 137 LOG_BASE_FIRST: t.Optional[bool] = None 138 FORCE_EARLY_ALIAS_REF_EXPANSION = True 139 140 # https://github.com/ClickHouse/ClickHouse/issues/33935#issue-1112165779 141 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE 142 143 UNESCAPED_SEQUENCES = { 144 "\\0": "\0", 145 } 146 147 CREATABLE_KIND_MAPPING = {"DATABASE": "SCHEMA"} 148 149 SET_OP_DISTINCT_BY_DEFAULT: t.Dict[t.Type[exp.Expression], t.Optional[bool]] = { 150 exp.Except: False, 151 exp.Intersect: False, 152 exp.Union: None, 153 } 154 155 class Tokenizer(tokens.Tokenizer): 156 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 157 IDENTIFIERS = ['"', "`"] 158 STRING_ESCAPES = ["'", "\\"] 159 BIT_STRINGS = [("0b", "")] 160 HEX_STRINGS = [("0x", ""), ("0X", "")] 161 HEREDOC_STRINGS = ["$"] 162 163 KEYWORDS = { 164 **tokens.Tokenizer.KEYWORDS, 165 "ATTACH": TokenType.COMMAND, 166 "DATE32": TokenType.DATE32, 167 "DATETIME64": TokenType.DATETIME64, 168 "DICTIONARY": TokenType.DICTIONARY, 169 "ENUM8": TokenType.ENUM8, 170 "ENUM16": TokenType.ENUM16, 171 "FINAL": TokenType.FINAL, 172 "FIXEDSTRING": TokenType.FIXEDSTRING, 173 "FLOAT32": TokenType.FLOAT, 174 "FLOAT64": TokenType.DOUBLE, 175 "GLOBAL": TokenType.GLOBAL, 176 "INT256": TokenType.INT256, 177 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 178 "MAP": TokenType.MAP, 179 "NESTED": TokenType.NESTED, 180 "SAMPLE": TokenType.TABLE_SAMPLE, 181 "TUPLE": TokenType.STRUCT, 182 "UINT128": TokenType.UINT128, 183 "UINT16": TokenType.USMALLINT, 184 "UINT256": TokenType.UINT256, 185 "UINT32": TokenType.UINT, 186 "UINT64": TokenType.UBIGINT, 187 "UINT8": TokenType.UTINYINT, 188 "IPV4": TokenType.IPV4, 189 "IPV6": TokenType.IPV6, 190 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 191 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 192 "SYSTEM": TokenType.COMMAND, 193 "PREWHERE": TokenType.PREWHERE, 194 } 195 KEYWORDS.pop("/*+") 196 197 SINGLE_TOKENS = { 198 **tokens.Tokenizer.SINGLE_TOKENS, 199 "$": TokenType.HEREDOC_STRING, 200 } 201 202 class Parser(parser.Parser): 203 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 204 # * select x from t1 union all select x from t2 limit 1; 205 # * select x from t1 union all (select x from t2 limit 1); 206 MODIFIERS_ATTACHED_TO_SET_OP = False 207 INTERVAL_SPANS = False 208 209 FUNCTIONS = { 210 **parser.Parser.FUNCTIONS, 211 "ANY": exp.AnyValue.from_arg_list, 212 "ARRAYSUM": exp.ArraySum.from_arg_list, 213 "COUNTIF": _build_count_if, 214 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 215 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 216 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 217 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 218 "DATE_FORMAT": _build_date_format, 219 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 220 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 221 "FORMATDATETIME": _build_date_format, 222 "JSONEXTRACTSTRING": build_json_extract_path( 223 exp.JSONExtractScalar, zero_based_indexing=False 224 ), 225 "MAP": parser.build_var_map, 226 "MATCH": exp.RegexpLike.from_arg_list, 227 "RANDCANONICAL": exp.Rand.from_arg_list, 228 "STR_TO_DATE": _build_str_to_date, 229 "TUPLE": exp.Struct.from_arg_list, 230 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 231 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 232 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 233 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 234 "UNIQ": exp.ApproxDistinct.from_arg_list, 235 "XOR": lambda args: exp.Xor(expressions=args), 236 "MD5": exp.MD5Digest.from_arg_list, 237 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 238 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 239 } 240 241 AGG_FUNCTIONS = { 242 "count", 243 "min", 244 "max", 245 "sum", 246 "avg", 247 "any", 248 "stddevPop", 249 "stddevSamp", 250 "varPop", 251 "varSamp", 252 "corr", 253 "covarPop", 254 "covarSamp", 255 "entropy", 256 "exponentialMovingAverage", 257 "intervalLengthSum", 258 "kolmogorovSmirnovTest", 259 "mannWhitneyUTest", 260 "median", 261 "rankCorr", 262 "sumKahan", 263 "studentTTest", 264 "welchTTest", 265 "anyHeavy", 266 "anyLast", 267 "boundingRatio", 268 "first_value", 269 "last_value", 270 "argMin", 271 "argMax", 272 "avgWeighted", 273 "topK", 274 "topKWeighted", 275 "deltaSum", 276 "deltaSumTimestamp", 277 "groupArray", 278 "groupArrayLast", 279 "groupUniqArray", 280 "groupArrayInsertAt", 281 "groupArrayMovingAvg", 282 "groupArrayMovingSum", 283 "groupArraySample", 284 "groupBitAnd", 285 "groupBitOr", 286 "groupBitXor", 287 "groupBitmap", 288 "groupBitmapAnd", 289 "groupBitmapOr", 290 "groupBitmapXor", 291 "sumWithOverflow", 292 "sumMap", 293 "minMap", 294 "maxMap", 295 "skewSamp", 296 "skewPop", 297 "kurtSamp", 298 "kurtPop", 299 "uniq", 300 "uniqExact", 301 "uniqCombined", 302 "uniqCombined64", 303 "uniqHLL12", 304 "uniqTheta", 305 "quantile", 306 "quantiles", 307 "quantileExact", 308 "quantilesExact", 309 "quantileExactLow", 310 "quantilesExactLow", 311 "quantileExactHigh", 312 "quantilesExactHigh", 313 "quantileExactWeighted", 314 "quantilesExactWeighted", 315 "quantileTiming", 316 "quantilesTiming", 317 "quantileTimingWeighted", 318 "quantilesTimingWeighted", 319 "quantileDeterministic", 320 "quantilesDeterministic", 321 "quantileTDigest", 322 "quantilesTDigest", 323 "quantileTDigestWeighted", 324 "quantilesTDigestWeighted", 325 "quantileBFloat16", 326 "quantilesBFloat16", 327 "quantileBFloat16Weighted", 328 "quantilesBFloat16Weighted", 329 "simpleLinearRegression", 330 "stochasticLinearRegression", 331 "stochasticLogisticRegression", 332 "categoricalInformationValue", 333 "contingency", 334 "cramersV", 335 "cramersVBiasCorrected", 336 "theilsU", 337 "maxIntersections", 338 "maxIntersectionsPosition", 339 "meanZTest", 340 "quantileInterpolatedWeighted", 341 "quantilesInterpolatedWeighted", 342 "quantileGK", 343 "quantilesGK", 344 "sparkBar", 345 "sumCount", 346 "largestTriangleThreeBuckets", 347 "histogram", 348 "sequenceMatch", 349 "sequenceCount", 350 "windowFunnel", 351 "retention", 352 "uniqUpTo", 353 "sequenceNextNode", 354 "exponentialTimeDecayedAvg", 355 } 356 357 AGG_FUNCTIONS_SUFFIXES = [ 358 "If", 359 "Array", 360 "ArrayIf", 361 "Map", 362 "SimpleState", 363 "State", 364 "Merge", 365 "MergeState", 366 "ForEach", 367 "Distinct", 368 "OrDefault", 369 "OrNull", 370 "Resample", 371 "ArgMin", 372 "ArgMax", 373 ] 374 375 FUNC_TOKENS = { 376 *parser.Parser.FUNC_TOKENS, 377 TokenType.SET, 378 } 379 380 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 381 382 ID_VAR_TOKENS = { 383 *parser.Parser.ID_VAR_TOKENS, 384 TokenType.LIKE, 385 } 386 387 AGG_FUNC_MAPPING = ( 388 lambda functions, suffixes: { 389 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 390 } 391 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 392 393 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 394 395 FUNCTION_PARSERS = { 396 **parser.Parser.FUNCTION_PARSERS, 397 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 398 "QUANTILE": lambda self: self._parse_quantile(), 399 "COLUMNS": lambda self: self._parse_columns(), 400 } 401 402 FUNCTION_PARSERS.pop("MATCH") 403 404 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 405 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 406 407 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 408 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 409 410 RANGE_PARSERS = { 411 **parser.Parser.RANGE_PARSERS, 412 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 413 and self._parse_in(this, is_global=True), 414 } 415 416 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 417 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 418 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 419 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 420 421 JOIN_KINDS = { 422 *parser.Parser.JOIN_KINDS, 423 TokenType.ANY, 424 TokenType.ASOF, 425 TokenType.ARRAY, 426 } 427 428 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 429 TokenType.ANY, 430 TokenType.ARRAY, 431 TokenType.FINAL, 432 TokenType.FORMAT, 433 TokenType.SETTINGS, 434 } 435 436 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 437 TokenType.FORMAT, 438 } 439 440 LOG_DEFAULTS_TO_LN = True 441 442 QUERY_MODIFIER_PARSERS = { 443 **parser.Parser.QUERY_MODIFIER_PARSERS, 444 TokenType.SETTINGS: lambda self: ( 445 "settings", 446 self._advance() or self._parse_csv(self._parse_assignment), 447 ), 448 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 449 } 450 451 CONSTRAINT_PARSERS = { 452 **parser.Parser.CONSTRAINT_PARSERS, 453 "INDEX": lambda self: self._parse_index_constraint(), 454 "CODEC": lambda self: self._parse_compress(), 455 } 456 457 ALTER_PARSERS = { 458 **parser.Parser.ALTER_PARSERS, 459 "REPLACE": lambda self: self._parse_alter_table_replace(), 460 } 461 462 SCHEMA_UNNAMED_CONSTRAINTS = { 463 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 464 "INDEX", 465 } 466 467 PLACEHOLDER_PARSERS = { 468 **parser.Parser.PLACEHOLDER_PARSERS, 469 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 470 } 471 472 def _parse_types( 473 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 474 ) -> t.Optional[exp.Expression]: 475 dtype = super()._parse_types( 476 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 477 ) 478 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 479 # Mark every type as non-nullable which is ClickHouse's default, unless it's 480 # already marked as nullable. This marker helps us transpile types from other 481 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 482 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 483 # fail in ClickHouse without the `Nullable` type constructor. 484 dtype.set("nullable", False) 485 486 return dtype 487 488 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 489 index = self._index 490 this = self._parse_bitwise() 491 if self._match(TokenType.FROM): 492 self._retreat(index) 493 return super()._parse_extract() 494 495 # We return Anonymous here because extract and regexpExtract have different semantics, 496 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 497 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 498 # 499 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 500 self._match(TokenType.COMMA) 501 return self.expression( 502 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 503 ) 504 505 def _parse_assignment(self) -> t.Optional[exp.Expression]: 506 this = super()._parse_assignment() 507 508 if self._match(TokenType.PLACEHOLDER): 509 return self.expression( 510 exp.If, 511 this=this, 512 true=self._parse_assignment(), 513 false=self._match(TokenType.COLON) and self._parse_assignment(), 514 ) 515 516 return this 517 518 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 519 """ 520 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 521 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 522 """ 523 this = self._parse_id_var() 524 self._match(TokenType.COLON) 525 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 526 self._match_text_seq("IDENTIFIER") and "Identifier" 527 ) 528 529 if not kind: 530 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 531 elif not self._match(TokenType.R_BRACE): 532 self.raise_error("Expecting }") 533 534 return self.expression(exp.Placeholder, this=this, kind=kind) 535 536 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 537 this = super()._parse_in(this) 538 this.set("is_global", is_global) 539 return this 540 541 def _parse_table( 542 self, 543 schema: bool = False, 544 joins: bool = False, 545 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 546 parse_bracket: bool = False, 547 is_db_reference: bool = False, 548 parse_partition: bool = False, 549 ) -> t.Optional[exp.Expression]: 550 this = super()._parse_table( 551 schema=schema, 552 joins=joins, 553 alias_tokens=alias_tokens, 554 parse_bracket=parse_bracket, 555 is_db_reference=is_db_reference, 556 ) 557 558 if self._match(TokenType.FINAL): 559 this = self.expression(exp.Final, this=this) 560 561 return this 562 563 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 564 return super()._parse_position(haystack_first=True) 565 566 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 567 def _parse_cte(self) -> exp.CTE: 568 # WITH <identifier> AS <subquery expression> 569 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 570 571 if not cte: 572 # WITH <expression> AS <identifier> 573 cte = self.expression( 574 exp.CTE, 575 this=self._parse_assignment(), 576 alias=self._parse_table_alias(), 577 scalar=True, 578 ) 579 580 return cte 581 582 def _parse_join_parts( 583 self, 584 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 585 is_global = self._match(TokenType.GLOBAL) and self._prev 586 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 587 588 if kind_pre: 589 kind = self._match_set(self.JOIN_KINDS) and self._prev 590 side = self._match_set(self.JOIN_SIDES) and self._prev 591 return is_global, side, kind 592 593 return ( 594 is_global, 595 self._match_set(self.JOIN_SIDES) and self._prev, 596 self._match_set(self.JOIN_KINDS) and self._prev, 597 ) 598 599 def _parse_join( 600 self, skip_join_token: bool = False, parse_bracket: bool = False 601 ) -> t.Optional[exp.Join]: 602 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 603 if join: 604 join.set("global", join.args.pop("method", None)) 605 606 return join 607 608 def _parse_function( 609 self, 610 functions: t.Optional[t.Dict[str, t.Callable]] = None, 611 anonymous: bool = False, 612 optional_parens: bool = True, 613 any_token: bool = False, 614 ) -> t.Optional[exp.Expression]: 615 expr = super()._parse_function( 616 functions=functions, 617 anonymous=anonymous, 618 optional_parens=optional_parens, 619 any_token=any_token, 620 ) 621 622 func = expr.this if isinstance(expr, exp.Window) else expr 623 624 # Aggregate functions can be split in 2 parts: <func_name><suffix> 625 parts = ( 626 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 627 ) 628 629 if parts: 630 params = self._parse_func_params(func) 631 632 kwargs = { 633 "this": func.this, 634 "expressions": func.expressions, 635 } 636 if parts[1]: 637 kwargs["parts"] = parts 638 exp_class = exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 639 else: 640 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 641 642 kwargs["exp_class"] = exp_class 643 if params: 644 kwargs["params"] = params 645 646 func = self.expression(**kwargs) 647 648 if isinstance(expr, exp.Window): 649 # The window's func was parsed as Anonymous in base parser, fix its 650 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 651 expr.set("this", func) 652 elif params: 653 # Params have blocked super()._parse_function() from parsing the following window 654 # (if that exists) as they're standing between the function call and the window spec 655 expr = self._parse_window(func) 656 else: 657 expr = func 658 659 return expr 660 661 def _parse_func_params( 662 self, this: t.Optional[exp.Func] = None 663 ) -> t.Optional[t.List[exp.Expression]]: 664 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 665 return self._parse_csv(self._parse_lambda) 666 667 if self._match(TokenType.L_PAREN): 668 params = self._parse_csv(self._parse_lambda) 669 self._match_r_paren(this) 670 return params 671 672 return None 673 674 def _parse_quantile(self) -> exp.Quantile: 675 this = self._parse_lambda() 676 params = self._parse_func_params() 677 if params: 678 return self.expression(exp.Quantile, this=params[0], quantile=this) 679 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 680 681 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 682 return super()._parse_wrapped_id_vars(optional=True) 683 684 def _parse_primary_key( 685 self, wrapped_optional: bool = False, in_props: bool = False 686 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 687 return super()._parse_primary_key( 688 wrapped_optional=wrapped_optional or in_props, in_props=in_props 689 ) 690 691 def _parse_on_property(self) -> t.Optional[exp.Expression]: 692 index = self._index 693 if self._match_text_seq("CLUSTER"): 694 this = self._parse_id_var() 695 if this: 696 return self.expression(exp.OnCluster, this=this) 697 else: 698 self._retreat(index) 699 return None 700 701 def _parse_index_constraint( 702 self, kind: t.Optional[str] = None 703 ) -> exp.IndexColumnConstraint: 704 # INDEX name1 expr TYPE type1(args) GRANULARITY value 705 this = self._parse_id_var() 706 expression = self._parse_assignment() 707 708 index_type = self._match_text_seq("TYPE") and ( 709 self._parse_function() or self._parse_var() 710 ) 711 712 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 713 714 return self.expression( 715 exp.IndexColumnConstraint, 716 this=this, 717 expression=expression, 718 index_type=index_type, 719 granularity=granularity, 720 ) 721 722 def _parse_partition(self) -> t.Optional[exp.Partition]: 723 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 724 if not self._match(TokenType.PARTITION): 725 return None 726 727 if self._match_text_seq("ID"): 728 # Corresponds to the PARTITION ID <string_value> syntax 729 expressions: t.List[exp.Expression] = [ 730 self.expression(exp.PartitionId, this=self._parse_string()) 731 ] 732 else: 733 expressions = self._parse_expressions() 734 735 return self.expression(exp.Partition, expressions=expressions) 736 737 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 738 partition = self._parse_partition() 739 740 if not partition or not self._match(TokenType.FROM): 741 return None 742 743 return self.expression( 744 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 745 ) 746 747 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 748 if not self._match_text_seq("PROJECTION"): 749 return None 750 751 return self.expression( 752 exp.ProjectionDef, 753 this=self._parse_id_var(), 754 expression=self._parse_wrapped(self._parse_statement), 755 ) 756 757 def _parse_constraint(self) -> t.Optional[exp.Expression]: 758 return super()._parse_constraint() or self._parse_projection_def() 759 760 def _parse_alias( 761 self, this: t.Optional[exp.Expression], explicit: bool = False 762 ) -> t.Optional[exp.Expression]: 763 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 764 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 765 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 766 return this 767 768 return super()._parse_alias(this=this, explicit=explicit) 769 770 def _parse_expression(self) -> t.Optional[exp.Expression]: 771 this = super()._parse_expression() 772 773 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 774 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 775 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 776 self._match(TokenType.R_PAREN) 777 778 return this 779 780 def _parse_columns(self) -> exp.Expression: 781 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 782 783 while self._next and self._match_text_seq(")", "APPLY", "("): 784 self._match(TokenType.R_PAREN) 785 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 786 return this 787 788 class Generator(generator.Generator): 789 QUERY_HINTS = False 790 STRUCT_DELIMITER = ("(", ")") 791 NVL2_SUPPORTED = False 792 TABLESAMPLE_REQUIRES_PARENS = False 793 TABLESAMPLE_SIZE_IS_ROWS = False 794 TABLESAMPLE_KEYWORDS = "SAMPLE" 795 LAST_DAY_SUPPORTS_DATE_PART = False 796 CAN_IMPLEMENT_ARRAY_ANY = True 797 SUPPORTS_TO_NUMBER = False 798 JOIN_HINTS = False 799 TABLE_HINTS = False 800 GROUPINGS_SEP = "" 801 SET_OP_MODIFIERS = False 802 SUPPORTS_TABLE_ALIAS_COLUMNS = False 803 VALUES_AS_TABLE = False 804 805 STRING_TYPE_MAPPING = { 806 exp.DataType.Type.CHAR: "String", 807 exp.DataType.Type.LONGBLOB: "String", 808 exp.DataType.Type.LONGTEXT: "String", 809 exp.DataType.Type.MEDIUMBLOB: "String", 810 exp.DataType.Type.MEDIUMTEXT: "String", 811 exp.DataType.Type.TINYBLOB: "String", 812 exp.DataType.Type.TINYTEXT: "String", 813 exp.DataType.Type.TEXT: "String", 814 exp.DataType.Type.VARBINARY: "String", 815 exp.DataType.Type.VARCHAR: "String", 816 } 817 818 SUPPORTED_JSON_PATH_PARTS = { 819 exp.JSONPathKey, 820 exp.JSONPathRoot, 821 exp.JSONPathSubscript, 822 } 823 824 TYPE_MAPPING = { 825 **generator.Generator.TYPE_MAPPING, 826 **STRING_TYPE_MAPPING, 827 exp.DataType.Type.ARRAY: "Array", 828 exp.DataType.Type.BIGINT: "Int64", 829 exp.DataType.Type.DATE32: "Date32", 830 exp.DataType.Type.DATETIME: "DateTime", 831 exp.DataType.Type.DATETIME64: "DateTime64", 832 exp.DataType.Type.TIMESTAMP: "DateTime", 833 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 834 exp.DataType.Type.DOUBLE: "Float64", 835 exp.DataType.Type.ENUM: "Enum", 836 exp.DataType.Type.ENUM8: "Enum8", 837 exp.DataType.Type.ENUM16: "Enum16", 838 exp.DataType.Type.FIXEDSTRING: "FixedString", 839 exp.DataType.Type.FLOAT: "Float32", 840 exp.DataType.Type.INT: "Int32", 841 exp.DataType.Type.MEDIUMINT: "Int32", 842 exp.DataType.Type.INT128: "Int128", 843 exp.DataType.Type.INT256: "Int256", 844 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 845 exp.DataType.Type.MAP: "Map", 846 exp.DataType.Type.NESTED: "Nested", 847 exp.DataType.Type.SMALLINT: "Int16", 848 exp.DataType.Type.STRUCT: "Tuple", 849 exp.DataType.Type.TINYINT: "Int8", 850 exp.DataType.Type.UBIGINT: "UInt64", 851 exp.DataType.Type.UINT: "UInt32", 852 exp.DataType.Type.UINT128: "UInt128", 853 exp.DataType.Type.UINT256: "UInt256", 854 exp.DataType.Type.USMALLINT: "UInt16", 855 exp.DataType.Type.UTINYINT: "UInt8", 856 exp.DataType.Type.IPV4: "IPv4", 857 exp.DataType.Type.IPV6: "IPv6", 858 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 859 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 860 } 861 862 TRANSFORMS = { 863 **generator.Generator.TRANSFORMS, 864 exp.AnyValue: rename_func("any"), 865 exp.ApproxDistinct: rename_func("uniq"), 866 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 867 exp.ArraySize: rename_func("LENGTH"), 868 exp.ArraySum: rename_func("arraySum"), 869 exp.ArgMax: arg_max_or_min_no_count("argMax"), 870 exp.ArgMin: arg_max_or_min_no_count("argMin"), 871 exp.Array: inline_array_sql, 872 exp.CastToStrType: rename_func("CAST"), 873 exp.CountIf: rename_func("countIf"), 874 exp.CompressColumnConstraint: lambda self, 875 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 876 exp.ComputedColumnConstraint: lambda self, 877 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 878 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 879 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 880 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 881 exp.DateStrToDate: rename_func("toDate"), 882 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 883 exp.Explode: rename_func("arrayJoin"), 884 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 885 exp.IsNan: rename_func("isNaN"), 886 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 887 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 888 exp.JSONPathKey: json_path_key_only_name, 889 exp.JSONPathRoot: lambda *_: "", 890 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 891 exp.Nullif: rename_func("nullIf"), 892 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 893 exp.Pivot: no_pivot_sql, 894 exp.Quantile: _quantile_sql, 895 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 896 exp.Rand: rename_func("randCanonical"), 897 exp.StartsWith: rename_func("startsWith"), 898 exp.StrPosition: lambda self, e: self.func( 899 "position", e.this, e.args.get("substr"), e.args.get("position") 900 ), 901 exp.TimeToStr: lambda self, e: self.func( 902 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 903 ), 904 exp.TimeStrToTime: _timestrtotime_sql, 905 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 906 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 907 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 908 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 909 exp.MD5Digest: rename_func("MD5"), 910 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 911 exp.SHA: rename_func("SHA1"), 912 exp.SHA2: sha256_sql, 913 exp.UnixToTime: _unix_to_time_sql, 914 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 915 exp.Trim: trim_sql, 916 exp.Variance: rename_func("varSamp"), 917 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 918 exp.Stddev: rename_func("stddevSamp"), 919 exp.Chr: rename_func("CHAR"), 920 exp.Lag: lambda self, e: self.func( 921 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 922 ), 923 exp.Lead: lambda self, e: self.func( 924 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 925 ), 926 } 927 928 PROPERTIES_LOCATION = { 929 **generator.Generator.PROPERTIES_LOCATION, 930 exp.OnCluster: exp.Properties.Location.POST_NAME, 931 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 932 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 933 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 934 } 935 936 # There's no list in docs, but it can be found in Clickhouse code 937 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 938 ON_CLUSTER_TARGETS = { 939 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 940 "DATABASE", 941 "TABLE", 942 "VIEW", 943 "DICTIONARY", 944 "INDEX", 945 "FUNCTION", 946 "NAMED COLLECTION", 947 } 948 949 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 950 NON_NULLABLE_TYPES = { 951 exp.DataType.Type.ARRAY, 952 exp.DataType.Type.MAP, 953 exp.DataType.Type.STRUCT, 954 } 955 956 def strtodate_sql(self, expression: exp.StrToDate) -> str: 957 strtodate_sql = self.function_fallback_sql(expression) 958 959 if not isinstance(expression.parent, exp.Cast): 960 # StrToDate returns DATEs in other dialects (eg. postgres), so 961 # this branch aims to improve the transpilation to clickhouse 962 return f"CAST({strtodate_sql} AS DATE)" 963 964 return strtodate_sql 965 966 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 967 this = expression.this 968 969 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 970 return self.sql(this) 971 972 return super().cast_sql(expression, safe_prefix=safe_prefix) 973 974 def trycast_sql(self, expression: exp.TryCast) -> str: 975 dtype = expression.to 976 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 977 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 978 dtype.set("nullable", True) 979 980 return super().cast_sql(expression) 981 982 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 983 this = self.json_path_part(expression.this) 984 return str(int(this) + 1) if is_int(this) else this 985 986 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 987 return f"AS {self.sql(expression, 'this')}" 988 989 def _any_to_has( 990 self, 991 expression: exp.EQ | exp.NEQ, 992 default: t.Callable[[t.Any], str], 993 prefix: str = "", 994 ) -> str: 995 if isinstance(expression.left, exp.Any): 996 arr = expression.left 997 this = expression.right 998 elif isinstance(expression.right, exp.Any): 999 arr = expression.right 1000 this = expression.left 1001 else: 1002 return default(expression) 1003 1004 return prefix + self.func("has", arr.this.unnest(), this) 1005 1006 def eq_sql(self, expression: exp.EQ) -> str: 1007 return self._any_to_has(expression, super().eq_sql) 1008 1009 def neq_sql(self, expression: exp.NEQ) -> str: 1010 return self._any_to_has(expression, super().neq_sql, "NOT ") 1011 1012 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1013 # Manually add a flag to make the search case-insensitive 1014 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1015 return self.func("match", expression.this, regex) 1016 1017 def datatype_sql(self, expression: exp.DataType) -> str: 1018 # String is the standard ClickHouse type, every other variant is just an alias. 1019 # Additionally, any supplied length parameter will be ignored. 1020 # 1021 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1022 if expression.this in self.STRING_TYPE_MAPPING: 1023 dtype = "String" 1024 else: 1025 dtype = super().datatype_sql(expression) 1026 1027 # This section changes the type to `Nullable(...)` if the following conditions hold: 1028 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1029 # and change their semantics 1030 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1031 # constraint: "Type of Map key must be a type, that can be represented by integer or 1032 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1033 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1034 parent = expression.parent 1035 nullable = expression.args.get("nullable") 1036 if nullable is True or ( 1037 nullable is None 1038 and not ( 1039 isinstance(parent, exp.DataType) 1040 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1041 and expression.index in (None, 0) 1042 ) 1043 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1044 ): 1045 dtype = f"Nullable({dtype})" 1046 1047 return dtype 1048 1049 def cte_sql(self, expression: exp.CTE) -> str: 1050 if expression.args.get("scalar"): 1051 this = self.sql(expression, "this") 1052 alias = self.sql(expression, "alias") 1053 return f"{this} AS {alias}" 1054 1055 return super().cte_sql(expression) 1056 1057 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1058 return super().after_limit_modifiers(expression) + [ 1059 ( 1060 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1061 if expression.args.get("settings") 1062 else "" 1063 ), 1064 ( 1065 self.seg("FORMAT ") + self.sql(expression, "format") 1066 if expression.args.get("format") 1067 else "" 1068 ), 1069 ] 1070 1071 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 1072 params = self.expressions(expression, key="params", flat=True) 1073 return self.func(expression.name, *expression.expressions) + f"({params})" 1074 1075 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 1076 return self.func(expression.name, *expression.expressions) 1077 1078 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 1079 return self.anonymousaggfunc_sql(expression) 1080 1081 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 1082 return self.parameterizedagg_sql(expression) 1083 1084 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1085 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1086 1087 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1088 return f"ON CLUSTER {self.sql(expression, 'this')}" 1089 1090 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1091 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1092 exp.Properties.Location.POST_NAME 1093 ): 1094 this_name = self.sql( 1095 expression.this if isinstance(expression.this, exp.Schema) else expression, 1096 "this", 1097 ) 1098 this_properties = " ".join( 1099 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1100 ) 1101 this_schema = self.schema_columns_sql(expression.this) 1102 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1103 1104 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1105 1106 return super().createable_sql(expression, locations) 1107 1108 def create_sql(self, expression: exp.Create) -> str: 1109 # The comment property comes last in CTAS statements, i.e. after the query 1110 query = expression.expression 1111 if isinstance(query, exp.Query): 1112 comment_prop = expression.find(exp.SchemaCommentProperty) 1113 if comment_prop: 1114 comment_prop.pop() 1115 query.replace(exp.paren(query)) 1116 else: 1117 comment_prop = None 1118 1119 create_sql = super().create_sql(expression) 1120 1121 comment_sql = self.sql(comment_prop) 1122 comment_sql = f" {comment_sql}" if comment_sql else "" 1123 1124 return f"{create_sql}{comment_sql}" 1125 1126 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1127 this = self.indent(self.sql(expression, "this")) 1128 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1129 1130 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1131 this = self.sql(expression, "this") 1132 this = f" {this}" if this else "" 1133 expr = self.sql(expression, "expression") 1134 expr = f" {expr}" if expr else "" 1135 index_type = self.sql(expression, "index_type") 1136 index_type = f" TYPE {index_type}" if index_type else "" 1137 granularity = self.sql(expression, "granularity") 1138 granularity = f" GRANULARITY {granularity}" if granularity else "" 1139 1140 return f"INDEX{this}{expr}{index_type}{granularity}" 1141 1142 def partition_sql(self, expression: exp.Partition) -> str: 1143 return f"PARTITION {self.expressions(expression, flat=True)}" 1144 1145 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1146 return f"ID {self.sql(expression.this)}" 1147 1148 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1149 return ( 1150 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1151 ) 1152 1153 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1154 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}"
133class ClickHouse(Dialect): 134 NORMALIZE_FUNCTIONS: bool | str = False 135 NULL_ORDERING = "nulls_are_last" 136 SUPPORTS_USER_DEFINED_TYPES = False 137 SAFE_DIVISION = True 138 LOG_BASE_FIRST: t.Optional[bool] = None 139 FORCE_EARLY_ALIAS_REF_EXPANSION = True 140 141 # https://github.com/ClickHouse/ClickHouse/issues/33935#issue-1112165779 142 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE 143 144 UNESCAPED_SEQUENCES = { 145 "\\0": "\0", 146 } 147 148 CREATABLE_KIND_MAPPING = {"DATABASE": "SCHEMA"} 149 150 SET_OP_DISTINCT_BY_DEFAULT: t.Dict[t.Type[exp.Expression], t.Optional[bool]] = { 151 exp.Except: False, 152 exp.Intersect: False, 153 exp.Union: None, 154 } 155 156 class Tokenizer(tokens.Tokenizer): 157 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 158 IDENTIFIERS = ['"', "`"] 159 STRING_ESCAPES = ["'", "\\"] 160 BIT_STRINGS = [("0b", "")] 161 HEX_STRINGS = [("0x", ""), ("0X", "")] 162 HEREDOC_STRINGS = ["$"] 163 164 KEYWORDS = { 165 **tokens.Tokenizer.KEYWORDS, 166 "ATTACH": TokenType.COMMAND, 167 "DATE32": TokenType.DATE32, 168 "DATETIME64": TokenType.DATETIME64, 169 "DICTIONARY": TokenType.DICTIONARY, 170 "ENUM8": TokenType.ENUM8, 171 "ENUM16": TokenType.ENUM16, 172 "FINAL": TokenType.FINAL, 173 "FIXEDSTRING": TokenType.FIXEDSTRING, 174 "FLOAT32": TokenType.FLOAT, 175 "FLOAT64": TokenType.DOUBLE, 176 "GLOBAL": TokenType.GLOBAL, 177 "INT256": TokenType.INT256, 178 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 179 "MAP": TokenType.MAP, 180 "NESTED": TokenType.NESTED, 181 "SAMPLE": TokenType.TABLE_SAMPLE, 182 "TUPLE": TokenType.STRUCT, 183 "UINT128": TokenType.UINT128, 184 "UINT16": TokenType.USMALLINT, 185 "UINT256": TokenType.UINT256, 186 "UINT32": TokenType.UINT, 187 "UINT64": TokenType.UBIGINT, 188 "UINT8": TokenType.UTINYINT, 189 "IPV4": TokenType.IPV4, 190 "IPV6": TokenType.IPV6, 191 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 192 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 193 "SYSTEM": TokenType.COMMAND, 194 "PREWHERE": TokenType.PREWHERE, 195 } 196 KEYWORDS.pop("/*+") 197 198 SINGLE_TOKENS = { 199 **tokens.Tokenizer.SINGLE_TOKENS, 200 "$": TokenType.HEREDOC_STRING, 201 } 202 203 class Parser(parser.Parser): 204 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 205 # * select x from t1 union all select x from t2 limit 1; 206 # * select x from t1 union all (select x from t2 limit 1); 207 MODIFIERS_ATTACHED_TO_SET_OP = False 208 INTERVAL_SPANS = False 209 210 FUNCTIONS = { 211 **parser.Parser.FUNCTIONS, 212 "ANY": exp.AnyValue.from_arg_list, 213 "ARRAYSUM": exp.ArraySum.from_arg_list, 214 "COUNTIF": _build_count_if, 215 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 216 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 217 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 218 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 219 "DATE_FORMAT": _build_date_format, 220 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 221 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 222 "FORMATDATETIME": _build_date_format, 223 "JSONEXTRACTSTRING": build_json_extract_path( 224 exp.JSONExtractScalar, zero_based_indexing=False 225 ), 226 "MAP": parser.build_var_map, 227 "MATCH": exp.RegexpLike.from_arg_list, 228 "RANDCANONICAL": exp.Rand.from_arg_list, 229 "STR_TO_DATE": _build_str_to_date, 230 "TUPLE": exp.Struct.from_arg_list, 231 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 232 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 233 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 234 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 235 "UNIQ": exp.ApproxDistinct.from_arg_list, 236 "XOR": lambda args: exp.Xor(expressions=args), 237 "MD5": exp.MD5Digest.from_arg_list, 238 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 239 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 240 } 241 242 AGG_FUNCTIONS = { 243 "count", 244 "min", 245 "max", 246 "sum", 247 "avg", 248 "any", 249 "stddevPop", 250 "stddevSamp", 251 "varPop", 252 "varSamp", 253 "corr", 254 "covarPop", 255 "covarSamp", 256 "entropy", 257 "exponentialMovingAverage", 258 "intervalLengthSum", 259 "kolmogorovSmirnovTest", 260 "mannWhitneyUTest", 261 "median", 262 "rankCorr", 263 "sumKahan", 264 "studentTTest", 265 "welchTTest", 266 "anyHeavy", 267 "anyLast", 268 "boundingRatio", 269 "first_value", 270 "last_value", 271 "argMin", 272 "argMax", 273 "avgWeighted", 274 "topK", 275 "topKWeighted", 276 "deltaSum", 277 "deltaSumTimestamp", 278 "groupArray", 279 "groupArrayLast", 280 "groupUniqArray", 281 "groupArrayInsertAt", 282 "groupArrayMovingAvg", 283 "groupArrayMovingSum", 284 "groupArraySample", 285 "groupBitAnd", 286 "groupBitOr", 287 "groupBitXor", 288 "groupBitmap", 289 "groupBitmapAnd", 290 "groupBitmapOr", 291 "groupBitmapXor", 292 "sumWithOverflow", 293 "sumMap", 294 "minMap", 295 "maxMap", 296 "skewSamp", 297 "skewPop", 298 "kurtSamp", 299 "kurtPop", 300 "uniq", 301 "uniqExact", 302 "uniqCombined", 303 "uniqCombined64", 304 "uniqHLL12", 305 "uniqTheta", 306 "quantile", 307 "quantiles", 308 "quantileExact", 309 "quantilesExact", 310 "quantileExactLow", 311 "quantilesExactLow", 312 "quantileExactHigh", 313 "quantilesExactHigh", 314 "quantileExactWeighted", 315 "quantilesExactWeighted", 316 "quantileTiming", 317 "quantilesTiming", 318 "quantileTimingWeighted", 319 "quantilesTimingWeighted", 320 "quantileDeterministic", 321 "quantilesDeterministic", 322 "quantileTDigest", 323 "quantilesTDigest", 324 "quantileTDigestWeighted", 325 "quantilesTDigestWeighted", 326 "quantileBFloat16", 327 "quantilesBFloat16", 328 "quantileBFloat16Weighted", 329 "quantilesBFloat16Weighted", 330 "simpleLinearRegression", 331 "stochasticLinearRegression", 332 "stochasticLogisticRegression", 333 "categoricalInformationValue", 334 "contingency", 335 "cramersV", 336 "cramersVBiasCorrected", 337 "theilsU", 338 "maxIntersections", 339 "maxIntersectionsPosition", 340 "meanZTest", 341 "quantileInterpolatedWeighted", 342 "quantilesInterpolatedWeighted", 343 "quantileGK", 344 "quantilesGK", 345 "sparkBar", 346 "sumCount", 347 "largestTriangleThreeBuckets", 348 "histogram", 349 "sequenceMatch", 350 "sequenceCount", 351 "windowFunnel", 352 "retention", 353 "uniqUpTo", 354 "sequenceNextNode", 355 "exponentialTimeDecayedAvg", 356 } 357 358 AGG_FUNCTIONS_SUFFIXES = [ 359 "If", 360 "Array", 361 "ArrayIf", 362 "Map", 363 "SimpleState", 364 "State", 365 "Merge", 366 "MergeState", 367 "ForEach", 368 "Distinct", 369 "OrDefault", 370 "OrNull", 371 "Resample", 372 "ArgMin", 373 "ArgMax", 374 ] 375 376 FUNC_TOKENS = { 377 *parser.Parser.FUNC_TOKENS, 378 TokenType.SET, 379 } 380 381 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 382 383 ID_VAR_TOKENS = { 384 *parser.Parser.ID_VAR_TOKENS, 385 TokenType.LIKE, 386 } 387 388 AGG_FUNC_MAPPING = ( 389 lambda functions, suffixes: { 390 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 391 } 392 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 393 394 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 395 396 FUNCTION_PARSERS = { 397 **parser.Parser.FUNCTION_PARSERS, 398 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 399 "QUANTILE": lambda self: self._parse_quantile(), 400 "COLUMNS": lambda self: self._parse_columns(), 401 } 402 403 FUNCTION_PARSERS.pop("MATCH") 404 405 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 406 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 407 408 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 409 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 410 411 RANGE_PARSERS = { 412 **parser.Parser.RANGE_PARSERS, 413 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 414 and self._parse_in(this, is_global=True), 415 } 416 417 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 418 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 419 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 420 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 421 422 JOIN_KINDS = { 423 *parser.Parser.JOIN_KINDS, 424 TokenType.ANY, 425 TokenType.ASOF, 426 TokenType.ARRAY, 427 } 428 429 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 430 TokenType.ANY, 431 TokenType.ARRAY, 432 TokenType.FINAL, 433 TokenType.FORMAT, 434 TokenType.SETTINGS, 435 } 436 437 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 438 TokenType.FORMAT, 439 } 440 441 LOG_DEFAULTS_TO_LN = True 442 443 QUERY_MODIFIER_PARSERS = { 444 **parser.Parser.QUERY_MODIFIER_PARSERS, 445 TokenType.SETTINGS: lambda self: ( 446 "settings", 447 self._advance() or self._parse_csv(self._parse_assignment), 448 ), 449 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 450 } 451 452 CONSTRAINT_PARSERS = { 453 **parser.Parser.CONSTRAINT_PARSERS, 454 "INDEX": lambda self: self._parse_index_constraint(), 455 "CODEC": lambda self: self._parse_compress(), 456 } 457 458 ALTER_PARSERS = { 459 **parser.Parser.ALTER_PARSERS, 460 "REPLACE": lambda self: self._parse_alter_table_replace(), 461 } 462 463 SCHEMA_UNNAMED_CONSTRAINTS = { 464 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 465 "INDEX", 466 } 467 468 PLACEHOLDER_PARSERS = { 469 **parser.Parser.PLACEHOLDER_PARSERS, 470 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 471 } 472 473 def _parse_types( 474 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 475 ) -> t.Optional[exp.Expression]: 476 dtype = super()._parse_types( 477 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 478 ) 479 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 480 # Mark every type as non-nullable which is ClickHouse's default, unless it's 481 # already marked as nullable. This marker helps us transpile types from other 482 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 483 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 484 # fail in ClickHouse without the `Nullable` type constructor. 485 dtype.set("nullable", False) 486 487 return dtype 488 489 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 490 index = self._index 491 this = self._parse_bitwise() 492 if self._match(TokenType.FROM): 493 self._retreat(index) 494 return super()._parse_extract() 495 496 # We return Anonymous here because extract and regexpExtract have different semantics, 497 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 498 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 499 # 500 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 501 self._match(TokenType.COMMA) 502 return self.expression( 503 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 504 ) 505 506 def _parse_assignment(self) -> t.Optional[exp.Expression]: 507 this = super()._parse_assignment() 508 509 if self._match(TokenType.PLACEHOLDER): 510 return self.expression( 511 exp.If, 512 this=this, 513 true=self._parse_assignment(), 514 false=self._match(TokenType.COLON) and self._parse_assignment(), 515 ) 516 517 return this 518 519 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 520 """ 521 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 522 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 523 """ 524 this = self._parse_id_var() 525 self._match(TokenType.COLON) 526 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 527 self._match_text_seq("IDENTIFIER") and "Identifier" 528 ) 529 530 if not kind: 531 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 532 elif not self._match(TokenType.R_BRACE): 533 self.raise_error("Expecting }") 534 535 return self.expression(exp.Placeholder, this=this, kind=kind) 536 537 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 538 this = super()._parse_in(this) 539 this.set("is_global", is_global) 540 return this 541 542 def _parse_table( 543 self, 544 schema: bool = False, 545 joins: bool = False, 546 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 547 parse_bracket: bool = False, 548 is_db_reference: bool = False, 549 parse_partition: bool = False, 550 ) -> t.Optional[exp.Expression]: 551 this = super()._parse_table( 552 schema=schema, 553 joins=joins, 554 alias_tokens=alias_tokens, 555 parse_bracket=parse_bracket, 556 is_db_reference=is_db_reference, 557 ) 558 559 if self._match(TokenType.FINAL): 560 this = self.expression(exp.Final, this=this) 561 562 return this 563 564 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 565 return super()._parse_position(haystack_first=True) 566 567 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 568 def _parse_cte(self) -> exp.CTE: 569 # WITH <identifier> AS <subquery expression> 570 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 571 572 if not cte: 573 # WITH <expression> AS <identifier> 574 cte = self.expression( 575 exp.CTE, 576 this=self._parse_assignment(), 577 alias=self._parse_table_alias(), 578 scalar=True, 579 ) 580 581 return cte 582 583 def _parse_join_parts( 584 self, 585 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 586 is_global = self._match(TokenType.GLOBAL) and self._prev 587 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 588 589 if kind_pre: 590 kind = self._match_set(self.JOIN_KINDS) and self._prev 591 side = self._match_set(self.JOIN_SIDES) and self._prev 592 return is_global, side, kind 593 594 return ( 595 is_global, 596 self._match_set(self.JOIN_SIDES) and self._prev, 597 self._match_set(self.JOIN_KINDS) and self._prev, 598 ) 599 600 def _parse_join( 601 self, skip_join_token: bool = False, parse_bracket: bool = False 602 ) -> t.Optional[exp.Join]: 603 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 604 if join: 605 join.set("global", join.args.pop("method", None)) 606 607 return join 608 609 def _parse_function( 610 self, 611 functions: t.Optional[t.Dict[str, t.Callable]] = None, 612 anonymous: bool = False, 613 optional_parens: bool = True, 614 any_token: bool = False, 615 ) -> t.Optional[exp.Expression]: 616 expr = super()._parse_function( 617 functions=functions, 618 anonymous=anonymous, 619 optional_parens=optional_parens, 620 any_token=any_token, 621 ) 622 623 func = expr.this if isinstance(expr, exp.Window) else expr 624 625 # Aggregate functions can be split in 2 parts: <func_name><suffix> 626 parts = ( 627 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 628 ) 629 630 if parts: 631 params = self._parse_func_params(func) 632 633 kwargs = { 634 "this": func.this, 635 "expressions": func.expressions, 636 } 637 if parts[1]: 638 kwargs["parts"] = parts 639 exp_class = exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 640 else: 641 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 642 643 kwargs["exp_class"] = exp_class 644 if params: 645 kwargs["params"] = params 646 647 func = self.expression(**kwargs) 648 649 if isinstance(expr, exp.Window): 650 # The window's func was parsed as Anonymous in base parser, fix its 651 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 652 expr.set("this", func) 653 elif params: 654 # Params have blocked super()._parse_function() from parsing the following window 655 # (if that exists) as they're standing between the function call and the window spec 656 expr = self._parse_window(func) 657 else: 658 expr = func 659 660 return expr 661 662 def _parse_func_params( 663 self, this: t.Optional[exp.Func] = None 664 ) -> t.Optional[t.List[exp.Expression]]: 665 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 666 return self._parse_csv(self._parse_lambda) 667 668 if self._match(TokenType.L_PAREN): 669 params = self._parse_csv(self._parse_lambda) 670 self._match_r_paren(this) 671 return params 672 673 return None 674 675 def _parse_quantile(self) -> exp.Quantile: 676 this = self._parse_lambda() 677 params = self._parse_func_params() 678 if params: 679 return self.expression(exp.Quantile, this=params[0], quantile=this) 680 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 681 682 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 683 return super()._parse_wrapped_id_vars(optional=True) 684 685 def _parse_primary_key( 686 self, wrapped_optional: bool = False, in_props: bool = False 687 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 688 return super()._parse_primary_key( 689 wrapped_optional=wrapped_optional or in_props, in_props=in_props 690 ) 691 692 def _parse_on_property(self) -> t.Optional[exp.Expression]: 693 index = self._index 694 if self._match_text_seq("CLUSTER"): 695 this = self._parse_id_var() 696 if this: 697 return self.expression(exp.OnCluster, this=this) 698 else: 699 self._retreat(index) 700 return None 701 702 def _parse_index_constraint( 703 self, kind: t.Optional[str] = None 704 ) -> exp.IndexColumnConstraint: 705 # INDEX name1 expr TYPE type1(args) GRANULARITY value 706 this = self._parse_id_var() 707 expression = self._parse_assignment() 708 709 index_type = self._match_text_seq("TYPE") and ( 710 self._parse_function() or self._parse_var() 711 ) 712 713 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 714 715 return self.expression( 716 exp.IndexColumnConstraint, 717 this=this, 718 expression=expression, 719 index_type=index_type, 720 granularity=granularity, 721 ) 722 723 def _parse_partition(self) -> t.Optional[exp.Partition]: 724 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 725 if not self._match(TokenType.PARTITION): 726 return None 727 728 if self._match_text_seq("ID"): 729 # Corresponds to the PARTITION ID <string_value> syntax 730 expressions: t.List[exp.Expression] = [ 731 self.expression(exp.PartitionId, this=self._parse_string()) 732 ] 733 else: 734 expressions = self._parse_expressions() 735 736 return self.expression(exp.Partition, expressions=expressions) 737 738 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 739 partition = self._parse_partition() 740 741 if not partition or not self._match(TokenType.FROM): 742 return None 743 744 return self.expression( 745 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 746 ) 747 748 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 749 if not self._match_text_seq("PROJECTION"): 750 return None 751 752 return self.expression( 753 exp.ProjectionDef, 754 this=self._parse_id_var(), 755 expression=self._parse_wrapped(self._parse_statement), 756 ) 757 758 def _parse_constraint(self) -> t.Optional[exp.Expression]: 759 return super()._parse_constraint() or self._parse_projection_def() 760 761 def _parse_alias( 762 self, this: t.Optional[exp.Expression], explicit: bool = False 763 ) -> t.Optional[exp.Expression]: 764 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 765 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 766 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 767 return this 768 769 return super()._parse_alias(this=this, explicit=explicit) 770 771 def _parse_expression(self) -> t.Optional[exp.Expression]: 772 this = super()._parse_expression() 773 774 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 775 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 776 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 777 self._match(TokenType.R_PAREN) 778 779 return this 780 781 def _parse_columns(self) -> exp.Expression: 782 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 783 784 while self._next and self._match_text_seq(")", "APPLY", "("): 785 self._match(TokenType.R_PAREN) 786 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 787 return this 788 789 class Generator(generator.Generator): 790 QUERY_HINTS = False 791 STRUCT_DELIMITER = ("(", ")") 792 NVL2_SUPPORTED = False 793 TABLESAMPLE_REQUIRES_PARENS = False 794 TABLESAMPLE_SIZE_IS_ROWS = False 795 TABLESAMPLE_KEYWORDS = "SAMPLE" 796 LAST_DAY_SUPPORTS_DATE_PART = False 797 CAN_IMPLEMENT_ARRAY_ANY = True 798 SUPPORTS_TO_NUMBER = False 799 JOIN_HINTS = False 800 TABLE_HINTS = False 801 GROUPINGS_SEP = "" 802 SET_OP_MODIFIERS = False 803 SUPPORTS_TABLE_ALIAS_COLUMNS = False 804 VALUES_AS_TABLE = False 805 806 STRING_TYPE_MAPPING = { 807 exp.DataType.Type.CHAR: "String", 808 exp.DataType.Type.LONGBLOB: "String", 809 exp.DataType.Type.LONGTEXT: "String", 810 exp.DataType.Type.MEDIUMBLOB: "String", 811 exp.DataType.Type.MEDIUMTEXT: "String", 812 exp.DataType.Type.TINYBLOB: "String", 813 exp.DataType.Type.TINYTEXT: "String", 814 exp.DataType.Type.TEXT: "String", 815 exp.DataType.Type.VARBINARY: "String", 816 exp.DataType.Type.VARCHAR: "String", 817 } 818 819 SUPPORTED_JSON_PATH_PARTS = { 820 exp.JSONPathKey, 821 exp.JSONPathRoot, 822 exp.JSONPathSubscript, 823 } 824 825 TYPE_MAPPING = { 826 **generator.Generator.TYPE_MAPPING, 827 **STRING_TYPE_MAPPING, 828 exp.DataType.Type.ARRAY: "Array", 829 exp.DataType.Type.BIGINT: "Int64", 830 exp.DataType.Type.DATE32: "Date32", 831 exp.DataType.Type.DATETIME: "DateTime", 832 exp.DataType.Type.DATETIME64: "DateTime64", 833 exp.DataType.Type.TIMESTAMP: "DateTime", 834 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 835 exp.DataType.Type.DOUBLE: "Float64", 836 exp.DataType.Type.ENUM: "Enum", 837 exp.DataType.Type.ENUM8: "Enum8", 838 exp.DataType.Type.ENUM16: "Enum16", 839 exp.DataType.Type.FIXEDSTRING: "FixedString", 840 exp.DataType.Type.FLOAT: "Float32", 841 exp.DataType.Type.INT: "Int32", 842 exp.DataType.Type.MEDIUMINT: "Int32", 843 exp.DataType.Type.INT128: "Int128", 844 exp.DataType.Type.INT256: "Int256", 845 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 846 exp.DataType.Type.MAP: "Map", 847 exp.DataType.Type.NESTED: "Nested", 848 exp.DataType.Type.SMALLINT: "Int16", 849 exp.DataType.Type.STRUCT: "Tuple", 850 exp.DataType.Type.TINYINT: "Int8", 851 exp.DataType.Type.UBIGINT: "UInt64", 852 exp.DataType.Type.UINT: "UInt32", 853 exp.DataType.Type.UINT128: "UInt128", 854 exp.DataType.Type.UINT256: "UInt256", 855 exp.DataType.Type.USMALLINT: "UInt16", 856 exp.DataType.Type.UTINYINT: "UInt8", 857 exp.DataType.Type.IPV4: "IPv4", 858 exp.DataType.Type.IPV6: "IPv6", 859 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 860 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 861 } 862 863 TRANSFORMS = { 864 **generator.Generator.TRANSFORMS, 865 exp.AnyValue: rename_func("any"), 866 exp.ApproxDistinct: rename_func("uniq"), 867 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 868 exp.ArraySize: rename_func("LENGTH"), 869 exp.ArraySum: rename_func("arraySum"), 870 exp.ArgMax: arg_max_or_min_no_count("argMax"), 871 exp.ArgMin: arg_max_or_min_no_count("argMin"), 872 exp.Array: inline_array_sql, 873 exp.CastToStrType: rename_func("CAST"), 874 exp.CountIf: rename_func("countIf"), 875 exp.CompressColumnConstraint: lambda self, 876 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 877 exp.ComputedColumnConstraint: lambda self, 878 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 879 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 880 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 881 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 882 exp.DateStrToDate: rename_func("toDate"), 883 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 884 exp.Explode: rename_func("arrayJoin"), 885 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 886 exp.IsNan: rename_func("isNaN"), 887 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 888 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 889 exp.JSONPathKey: json_path_key_only_name, 890 exp.JSONPathRoot: lambda *_: "", 891 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 892 exp.Nullif: rename_func("nullIf"), 893 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 894 exp.Pivot: no_pivot_sql, 895 exp.Quantile: _quantile_sql, 896 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 897 exp.Rand: rename_func("randCanonical"), 898 exp.StartsWith: rename_func("startsWith"), 899 exp.StrPosition: lambda self, e: self.func( 900 "position", e.this, e.args.get("substr"), e.args.get("position") 901 ), 902 exp.TimeToStr: lambda self, e: self.func( 903 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 904 ), 905 exp.TimeStrToTime: _timestrtotime_sql, 906 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 907 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 908 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 909 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 910 exp.MD5Digest: rename_func("MD5"), 911 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 912 exp.SHA: rename_func("SHA1"), 913 exp.SHA2: sha256_sql, 914 exp.UnixToTime: _unix_to_time_sql, 915 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 916 exp.Trim: trim_sql, 917 exp.Variance: rename_func("varSamp"), 918 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 919 exp.Stddev: rename_func("stddevSamp"), 920 exp.Chr: rename_func("CHAR"), 921 exp.Lag: lambda self, e: self.func( 922 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 923 ), 924 exp.Lead: lambda self, e: self.func( 925 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 926 ), 927 } 928 929 PROPERTIES_LOCATION = { 930 **generator.Generator.PROPERTIES_LOCATION, 931 exp.OnCluster: exp.Properties.Location.POST_NAME, 932 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 933 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 934 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 935 } 936 937 # There's no list in docs, but it can be found in Clickhouse code 938 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 939 ON_CLUSTER_TARGETS = { 940 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 941 "DATABASE", 942 "TABLE", 943 "VIEW", 944 "DICTIONARY", 945 "INDEX", 946 "FUNCTION", 947 "NAMED COLLECTION", 948 } 949 950 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 951 NON_NULLABLE_TYPES = { 952 exp.DataType.Type.ARRAY, 953 exp.DataType.Type.MAP, 954 exp.DataType.Type.STRUCT, 955 } 956 957 def strtodate_sql(self, expression: exp.StrToDate) -> str: 958 strtodate_sql = self.function_fallback_sql(expression) 959 960 if not isinstance(expression.parent, exp.Cast): 961 # StrToDate returns DATEs in other dialects (eg. postgres), so 962 # this branch aims to improve the transpilation to clickhouse 963 return f"CAST({strtodate_sql} AS DATE)" 964 965 return strtodate_sql 966 967 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 968 this = expression.this 969 970 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 971 return self.sql(this) 972 973 return super().cast_sql(expression, safe_prefix=safe_prefix) 974 975 def trycast_sql(self, expression: exp.TryCast) -> str: 976 dtype = expression.to 977 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 978 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 979 dtype.set("nullable", True) 980 981 return super().cast_sql(expression) 982 983 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 984 this = self.json_path_part(expression.this) 985 return str(int(this) + 1) if is_int(this) else this 986 987 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 988 return f"AS {self.sql(expression, 'this')}" 989 990 def _any_to_has( 991 self, 992 expression: exp.EQ | exp.NEQ, 993 default: t.Callable[[t.Any], str], 994 prefix: str = "", 995 ) -> str: 996 if isinstance(expression.left, exp.Any): 997 arr = expression.left 998 this = expression.right 999 elif isinstance(expression.right, exp.Any): 1000 arr = expression.right 1001 this = expression.left 1002 else: 1003 return default(expression) 1004 1005 return prefix + self.func("has", arr.this.unnest(), this) 1006 1007 def eq_sql(self, expression: exp.EQ) -> str: 1008 return self._any_to_has(expression, super().eq_sql) 1009 1010 def neq_sql(self, expression: exp.NEQ) -> str: 1011 return self._any_to_has(expression, super().neq_sql, "NOT ") 1012 1013 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1014 # Manually add a flag to make the search case-insensitive 1015 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1016 return self.func("match", expression.this, regex) 1017 1018 def datatype_sql(self, expression: exp.DataType) -> str: 1019 # String is the standard ClickHouse type, every other variant is just an alias. 1020 # Additionally, any supplied length parameter will be ignored. 1021 # 1022 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1023 if expression.this in self.STRING_TYPE_MAPPING: 1024 dtype = "String" 1025 else: 1026 dtype = super().datatype_sql(expression) 1027 1028 # This section changes the type to `Nullable(...)` if the following conditions hold: 1029 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1030 # and change their semantics 1031 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1032 # constraint: "Type of Map key must be a type, that can be represented by integer or 1033 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1034 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1035 parent = expression.parent 1036 nullable = expression.args.get("nullable") 1037 if nullable is True or ( 1038 nullable is None 1039 and not ( 1040 isinstance(parent, exp.DataType) 1041 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1042 and expression.index in (None, 0) 1043 ) 1044 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1045 ): 1046 dtype = f"Nullable({dtype})" 1047 1048 return dtype 1049 1050 def cte_sql(self, expression: exp.CTE) -> str: 1051 if expression.args.get("scalar"): 1052 this = self.sql(expression, "this") 1053 alias = self.sql(expression, "alias") 1054 return f"{this} AS {alias}" 1055 1056 return super().cte_sql(expression) 1057 1058 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1059 return super().after_limit_modifiers(expression) + [ 1060 ( 1061 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1062 if expression.args.get("settings") 1063 else "" 1064 ), 1065 ( 1066 self.seg("FORMAT ") + self.sql(expression, "format") 1067 if expression.args.get("format") 1068 else "" 1069 ), 1070 ] 1071 1072 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 1073 params = self.expressions(expression, key="params", flat=True) 1074 return self.func(expression.name, *expression.expressions) + f"({params})" 1075 1076 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 1077 return self.func(expression.name, *expression.expressions) 1078 1079 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 1080 return self.anonymousaggfunc_sql(expression) 1081 1082 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 1083 return self.parameterizedagg_sql(expression) 1084 1085 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1086 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1087 1088 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1089 return f"ON CLUSTER {self.sql(expression, 'this')}" 1090 1091 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1092 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1093 exp.Properties.Location.POST_NAME 1094 ): 1095 this_name = self.sql( 1096 expression.this if isinstance(expression.this, exp.Schema) else expression, 1097 "this", 1098 ) 1099 this_properties = " ".join( 1100 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1101 ) 1102 this_schema = self.schema_columns_sql(expression.this) 1103 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1104 1105 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1106 1107 return super().createable_sql(expression, locations) 1108 1109 def create_sql(self, expression: exp.Create) -> str: 1110 # The comment property comes last in CTAS statements, i.e. after the query 1111 query = expression.expression 1112 if isinstance(query, exp.Query): 1113 comment_prop = expression.find(exp.SchemaCommentProperty) 1114 if comment_prop: 1115 comment_prop.pop() 1116 query.replace(exp.paren(query)) 1117 else: 1118 comment_prop = None 1119 1120 create_sql = super().create_sql(expression) 1121 1122 comment_sql = self.sql(comment_prop) 1123 comment_sql = f" {comment_sql}" if comment_sql else "" 1124 1125 return f"{create_sql}{comment_sql}" 1126 1127 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1128 this = self.indent(self.sql(expression, "this")) 1129 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1130 1131 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1132 this = self.sql(expression, "this") 1133 this = f" {this}" if this else "" 1134 expr = self.sql(expression, "expression") 1135 expr = f" {expr}" if expr else "" 1136 index_type = self.sql(expression, "index_type") 1137 index_type = f" TYPE {index_type}" if index_type else "" 1138 granularity = self.sql(expression, "granularity") 1139 granularity = f" GRANULARITY {granularity}" if granularity else "" 1140 1141 return f"INDEX{this}{expr}{index_type}{granularity}" 1142 1143 def partition_sql(self, expression: exp.Partition) -> str: 1144 return f"PARTITION {self.expressions(expression, flat=True)}" 1145 1146 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1147 return f"ID {self.sql(expression.this)}" 1148 1149 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1150 return ( 1151 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1152 ) 1153 1154 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1155 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}"
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
For example:
WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1
In most dialects, "my_id" would refer to "data.my_id" across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"
Specifies the strategy according to which identifiers should be normalized.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Helper for dialects that use a different name for the same creatable kind. For example, the Clickhouse equivalent of CREATE SCHEMA is CREATE DATABASE.
Whether a set operation uses DISTINCT by default. This is None
when either DISTINCT
or ALL
must be explicitly specified.
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- SUPPORTS_SEMI_ANTI_JOIN
- COPY_PARAMS_ARE_CSV
- TYPED_DIVISION
- CONCAT_COALESCE
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- TIME_MAPPING
- FORMAT_MAPPING
- PSEUDOCOLUMNS
- PREFER_CTE_ALIAS_COLUMN
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- SUPPORTS_ORDER_BY_ALL
- HAS_DISTINCT_ARRAY_CONSTRUCTORS
- SUPPORTS_FIXED_SIZE_ARRAYS
- STRICT_JSON_PATH_SYNTAX
- ON_CONDITION_EMPTY_BEFORE_ERROR
- ARRAY_AGG_INCLUDES_NULLS
- REGEXP_EXTRACT_DEFAULT_GROUP
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- ANNOTATORS
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
156 class Tokenizer(tokens.Tokenizer): 157 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 158 IDENTIFIERS = ['"', "`"] 159 STRING_ESCAPES = ["'", "\\"] 160 BIT_STRINGS = [("0b", "")] 161 HEX_STRINGS = [("0x", ""), ("0X", "")] 162 HEREDOC_STRINGS = ["$"] 163 164 KEYWORDS = { 165 **tokens.Tokenizer.KEYWORDS, 166 "ATTACH": TokenType.COMMAND, 167 "DATE32": TokenType.DATE32, 168 "DATETIME64": TokenType.DATETIME64, 169 "DICTIONARY": TokenType.DICTIONARY, 170 "ENUM8": TokenType.ENUM8, 171 "ENUM16": TokenType.ENUM16, 172 "FINAL": TokenType.FINAL, 173 "FIXEDSTRING": TokenType.FIXEDSTRING, 174 "FLOAT32": TokenType.FLOAT, 175 "FLOAT64": TokenType.DOUBLE, 176 "GLOBAL": TokenType.GLOBAL, 177 "INT256": TokenType.INT256, 178 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 179 "MAP": TokenType.MAP, 180 "NESTED": TokenType.NESTED, 181 "SAMPLE": TokenType.TABLE_SAMPLE, 182 "TUPLE": TokenType.STRUCT, 183 "UINT128": TokenType.UINT128, 184 "UINT16": TokenType.USMALLINT, 185 "UINT256": TokenType.UINT256, 186 "UINT32": TokenType.UINT, 187 "UINT64": TokenType.UBIGINT, 188 "UINT8": TokenType.UTINYINT, 189 "IPV4": TokenType.IPV4, 190 "IPV6": TokenType.IPV6, 191 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 192 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 193 "SYSTEM": TokenType.COMMAND, 194 "PREWHERE": TokenType.PREWHERE, 195 } 196 KEYWORDS.pop("/*+") 197 198 SINGLE_TOKENS = { 199 **tokens.Tokenizer.SINGLE_TOKENS, 200 "$": TokenType.HEREDOC_STRING, 201 }
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BYTE_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- IDENTIFIER_ESCAPES
- QUOTES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
203 class Parser(parser.Parser): 204 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 205 # * select x from t1 union all select x from t2 limit 1; 206 # * select x from t1 union all (select x from t2 limit 1); 207 MODIFIERS_ATTACHED_TO_SET_OP = False 208 INTERVAL_SPANS = False 209 210 FUNCTIONS = { 211 **parser.Parser.FUNCTIONS, 212 "ANY": exp.AnyValue.from_arg_list, 213 "ARRAYSUM": exp.ArraySum.from_arg_list, 214 "COUNTIF": _build_count_if, 215 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 216 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 217 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 218 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 219 "DATE_FORMAT": _build_date_format, 220 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 221 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 222 "FORMATDATETIME": _build_date_format, 223 "JSONEXTRACTSTRING": build_json_extract_path( 224 exp.JSONExtractScalar, zero_based_indexing=False 225 ), 226 "MAP": parser.build_var_map, 227 "MATCH": exp.RegexpLike.from_arg_list, 228 "RANDCANONICAL": exp.Rand.from_arg_list, 229 "STR_TO_DATE": _build_str_to_date, 230 "TUPLE": exp.Struct.from_arg_list, 231 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 232 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 233 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 234 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 235 "UNIQ": exp.ApproxDistinct.from_arg_list, 236 "XOR": lambda args: exp.Xor(expressions=args), 237 "MD5": exp.MD5Digest.from_arg_list, 238 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 239 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 240 } 241 242 AGG_FUNCTIONS = { 243 "count", 244 "min", 245 "max", 246 "sum", 247 "avg", 248 "any", 249 "stddevPop", 250 "stddevSamp", 251 "varPop", 252 "varSamp", 253 "corr", 254 "covarPop", 255 "covarSamp", 256 "entropy", 257 "exponentialMovingAverage", 258 "intervalLengthSum", 259 "kolmogorovSmirnovTest", 260 "mannWhitneyUTest", 261 "median", 262 "rankCorr", 263 "sumKahan", 264 "studentTTest", 265 "welchTTest", 266 "anyHeavy", 267 "anyLast", 268 "boundingRatio", 269 "first_value", 270 "last_value", 271 "argMin", 272 "argMax", 273 "avgWeighted", 274 "topK", 275 "topKWeighted", 276 "deltaSum", 277 "deltaSumTimestamp", 278 "groupArray", 279 "groupArrayLast", 280 "groupUniqArray", 281 "groupArrayInsertAt", 282 "groupArrayMovingAvg", 283 "groupArrayMovingSum", 284 "groupArraySample", 285 "groupBitAnd", 286 "groupBitOr", 287 "groupBitXor", 288 "groupBitmap", 289 "groupBitmapAnd", 290 "groupBitmapOr", 291 "groupBitmapXor", 292 "sumWithOverflow", 293 "sumMap", 294 "minMap", 295 "maxMap", 296 "skewSamp", 297 "skewPop", 298 "kurtSamp", 299 "kurtPop", 300 "uniq", 301 "uniqExact", 302 "uniqCombined", 303 "uniqCombined64", 304 "uniqHLL12", 305 "uniqTheta", 306 "quantile", 307 "quantiles", 308 "quantileExact", 309 "quantilesExact", 310 "quantileExactLow", 311 "quantilesExactLow", 312 "quantileExactHigh", 313 "quantilesExactHigh", 314 "quantileExactWeighted", 315 "quantilesExactWeighted", 316 "quantileTiming", 317 "quantilesTiming", 318 "quantileTimingWeighted", 319 "quantilesTimingWeighted", 320 "quantileDeterministic", 321 "quantilesDeterministic", 322 "quantileTDigest", 323 "quantilesTDigest", 324 "quantileTDigestWeighted", 325 "quantilesTDigestWeighted", 326 "quantileBFloat16", 327 "quantilesBFloat16", 328 "quantileBFloat16Weighted", 329 "quantilesBFloat16Weighted", 330 "simpleLinearRegression", 331 "stochasticLinearRegression", 332 "stochasticLogisticRegression", 333 "categoricalInformationValue", 334 "contingency", 335 "cramersV", 336 "cramersVBiasCorrected", 337 "theilsU", 338 "maxIntersections", 339 "maxIntersectionsPosition", 340 "meanZTest", 341 "quantileInterpolatedWeighted", 342 "quantilesInterpolatedWeighted", 343 "quantileGK", 344 "quantilesGK", 345 "sparkBar", 346 "sumCount", 347 "largestTriangleThreeBuckets", 348 "histogram", 349 "sequenceMatch", 350 "sequenceCount", 351 "windowFunnel", 352 "retention", 353 "uniqUpTo", 354 "sequenceNextNode", 355 "exponentialTimeDecayedAvg", 356 } 357 358 AGG_FUNCTIONS_SUFFIXES = [ 359 "If", 360 "Array", 361 "ArrayIf", 362 "Map", 363 "SimpleState", 364 "State", 365 "Merge", 366 "MergeState", 367 "ForEach", 368 "Distinct", 369 "OrDefault", 370 "OrNull", 371 "Resample", 372 "ArgMin", 373 "ArgMax", 374 ] 375 376 FUNC_TOKENS = { 377 *parser.Parser.FUNC_TOKENS, 378 TokenType.SET, 379 } 380 381 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 382 383 ID_VAR_TOKENS = { 384 *parser.Parser.ID_VAR_TOKENS, 385 TokenType.LIKE, 386 } 387 388 AGG_FUNC_MAPPING = ( 389 lambda functions, suffixes: { 390 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 391 } 392 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 393 394 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 395 396 FUNCTION_PARSERS = { 397 **parser.Parser.FUNCTION_PARSERS, 398 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 399 "QUANTILE": lambda self: self._parse_quantile(), 400 "COLUMNS": lambda self: self._parse_columns(), 401 } 402 403 FUNCTION_PARSERS.pop("MATCH") 404 405 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 406 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 407 408 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 409 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 410 411 RANGE_PARSERS = { 412 **parser.Parser.RANGE_PARSERS, 413 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 414 and self._parse_in(this, is_global=True), 415 } 416 417 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 418 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 419 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 420 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 421 422 JOIN_KINDS = { 423 *parser.Parser.JOIN_KINDS, 424 TokenType.ANY, 425 TokenType.ASOF, 426 TokenType.ARRAY, 427 } 428 429 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 430 TokenType.ANY, 431 TokenType.ARRAY, 432 TokenType.FINAL, 433 TokenType.FORMAT, 434 TokenType.SETTINGS, 435 } 436 437 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 438 TokenType.FORMAT, 439 } 440 441 LOG_DEFAULTS_TO_LN = True 442 443 QUERY_MODIFIER_PARSERS = { 444 **parser.Parser.QUERY_MODIFIER_PARSERS, 445 TokenType.SETTINGS: lambda self: ( 446 "settings", 447 self._advance() or self._parse_csv(self._parse_assignment), 448 ), 449 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 450 } 451 452 CONSTRAINT_PARSERS = { 453 **parser.Parser.CONSTRAINT_PARSERS, 454 "INDEX": lambda self: self._parse_index_constraint(), 455 "CODEC": lambda self: self._parse_compress(), 456 } 457 458 ALTER_PARSERS = { 459 **parser.Parser.ALTER_PARSERS, 460 "REPLACE": lambda self: self._parse_alter_table_replace(), 461 } 462 463 SCHEMA_UNNAMED_CONSTRAINTS = { 464 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 465 "INDEX", 466 } 467 468 PLACEHOLDER_PARSERS = { 469 **parser.Parser.PLACEHOLDER_PARSERS, 470 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 471 } 472 473 def _parse_types( 474 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 475 ) -> t.Optional[exp.Expression]: 476 dtype = super()._parse_types( 477 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 478 ) 479 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 480 # Mark every type as non-nullable which is ClickHouse's default, unless it's 481 # already marked as nullable. This marker helps us transpile types from other 482 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 483 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 484 # fail in ClickHouse without the `Nullable` type constructor. 485 dtype.set("nullable", False) 486 487 return dtype 488 489 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 490 index = self._index 491 this = self._parse_bitwise() 492 if self._match(TokenType.FROM): 493 self._retreat(index) 494 return super()._parse_extract() 495 496 # We return Anonymous here because extract and regexpExtract have different semantics, 497 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 498 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 499 # 500 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 501 self._match(TokenType.COMMA) 502 return self.expression( 503 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 504 ) 505 506 def _parse_assignment(self) -> t.Optional[exp.Expression]: 507 this = super()._parse_assignment() 508 509 if self._match(TokenType.PLACEHOLDER): 510 return self.expression( 511 exp.If, 512 this=this, 513 true=self._parse_assignment(), 514 false=self._match(TokenType.COLON) and self._parse_assignment(), 515 ) 516 517 return this 518 519 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 520 """ 521 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 522 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 523 """ 524 this = self._parse_id_var() 525 self._match(TokenType.COLON) 526 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 527 self._match_text_seq("IDENTIFIER") and "Identifier" 528 ) 529 530 if not kind: 531 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 532 elif not self._match(TokenType.R_BRACE): 533 self.raise_error("Expecting }") 534 535 return self.expression(exp.Placeholder, this=this, kind=kind) 536 537 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 538 this = super()._parse_in(this) 539 this.set("is_global", is_global) 540 return this 541 542 def _parse_table( 543 self, 544 schema: bool = False, 545 joins: bool = False, 546 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 547 parse_bracket: bool = False, 548 is_db_reference: bool = False, 549 parse_partition: bool = False, 550 ) -> t.Optional[exp.Expression]: 551 this = super()._parse_table( 552 schema=schema, 553 joins=joins, 554 alias_tokens=alias_tokens, 555 parse_bracket=parse_bracket, 556 is_db_reference=is_db_reference, 557 ) 558 559 if self._match(TokenType.FINAL): 560 this = self.expression(exp.Final, this=this) 561 562 return this 563 564 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 565 return super()._parse_position(haystack_first=True) 566 567 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 568 def _parse_cte(self) -> exp.CTE: 569 # WITH <identifier> AS <subquery expression> 570 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 571 572 if not cte: 573 # WITH <expression> AS <identifier> 574 cte = self.expression( 575 exp.CTE, 576 this=self._parse_assignment(), 577 alias=self._parse_table_alias(), 578 scalar=True, 579 ) 580 581 return cte 582 583 def _parse_join_parts( 584 self, 585 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 586 is_global = self._match(TokenType.GLOBAL) and self._prev 587 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 588 589 if kind_pre: 590 kind = self._match_set(self.JOIN_KINDS) and self._prev 591 side = self._match_set(self.JOIN_SIDES) and self._prev 592 return is_global, side, kind 593 594 return ( 595 is_global, 596 self._match_set(self.JOIN_SIDES) and self._prev, 597 self._match_set(self.JOIN_KINDS) and self._prev, 598 ) 599 600 def _parse_join( 601 self, skip_join_token: bool = False, parse_bracket: bool = False 602 ) -> t.Optional[exp.Join]: 603 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 604 if join: 605 join.set("global", join.args.pop("method", None)) 606 607 return join 608 609 def _parse_function( 610 self, 611 functions: t.Optional[t.Dict[str, t.Callable]] = None, 612 anonymous: bool = False, 613 optional_parens: bool = True, 614 any_token: bool = False, 615 ) -> t.Optional[exp.Expression]: 616 expr = super()._parse_function( 617 functions=functions, 618 anonymous=anonymous, 619 optional_parens=optional_parens, 620 any_token=any_token, 621 ) 622 623 func = expr.this if isinstance(expr, exp.Window) else expr 624 625 # Aggregate functions can be split in 2 parts: <func_name><suffix> 626 parts = ( 627 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 628 ) 629 630 if parts: 631 params = self._parse_func_params(func) 632 633 kwargs = { 634 "this": func.this, 635 "expressions": func.expressions, 636 } 637 if parts[1]: 638 kwargs["parts"] = parts 639 exp_class = exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 640 else: 641 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 642 643 kwargs["exp_class"] = exp_class 644 if params: 645 kwargs["params"] = params 646 647 func = self.expression(**kwargs) 648 649 if isinstance(expr, exp.Window): 650 # The window's func was parsed as Anonymous in base parser, fix its 651 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 652 expr.set("this", func) 653 elif params: 654 # Params have blocked super()._parse_function() from parsing the following window 655 # (if that exists) as they're standing between the function call and the window spec 656 expr = self._parse_window(func) 657 else: 658 expr = func 659 660 return expr 661 662 def _parse_func_params( 663 self, this: t.Optional[exp.Func] = None 664 ) -> t.Optional[t.List[exp.Expression]]: 665 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 666 return self._parse_csv(self._parse_lambda) 667 668 if self._match(TokenType.L_PAREN): 669 params = self._parse_csv(self._parse_lambda) 670 self._match_r_paren(this) 671 return params 672 673 return None 674 675 def _parse_quantile(self) -> exp.Quantile: 676 this = self._parse_lambda() 677 params = self._parse_func_params() 678 if params: 679 return self.expression(exp.Quantile, this=params[0], quantile=this) 680 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 681 682 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 683 return super()._parse_wrapped_id_vars(optional=True) 684 685 def _parse_primary_key( 686 self, wrapped_optional: bool = False, in_props: bool = False 687 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 688 return super()._parse_primary_key( 689 wrapped_optional=wrapped_optional or in_props, in_props=in_props 690 ) 691 692 def _parse_on_property(self) -> t.Optional[exp.Expression]: 693 index = self._index 694 if self._match_text_seq("CLUSTER"): 695 this = self._parse_id_var() 696 if this: 697 return self.expression(exp.OnCluster, this=this) 698 else: 699 self._retreat(index) 700 return None 701 702 def _parse_index_constraint( 703 self, kind: t.Optional[str] = None 704 ) -> exp.IndexColumnConstraint: 705 # INDEX name1 expr TYPE type1(args) GRANULARITY value 706 this = self._parse_id_var() 707 expression = self._parse_assignment() 708 709 index_type = self._match_text_seq("TYPE") and ( 710 self._parse_function() or self._parse_var() 711 ) 712 713 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 714 715 return self.expression( 716 exp.IndexColumnConstraint, 717 this=this, 718 expression=expression, 719 index_type=index_type, 720 granularity=granularity, 721 ) 722 723 def _parse_partition(self) -> t.Optional[exp.Partition]: 724 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 725 if not self._match(TokenType.PARTITION): 726 return None 727 728 if self._match_text_seq("ID"): 729 # Corresponds to the PARTITION ID <string_value> syntax 730 expressions: t.List[exp.Expression] = [ 731 self.expression(exp.PartitionId, this=self._parse_string()) 732 ] 733 else: 734 expressions = self._parse_expressions() 735 736 return self.expression(exp.Partition, expressions=expressions) 737 738 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 739 partition = self._parse_partition() 740 741 if not partition or not self._match(TokenType.FROM): 742 return None 743 744 return self.expression( 745 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 746 ) 747 748 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 749 if not self._match_text_seq("PROJECTION"): 750 return None 751 752 return self.expression( 753 exp.ProjectionDef, 754 this=self._parse_id_var(), 755 expression=self._parse_wrapped(self._parse_statement), 756 ) 757 758 def _parse_constraint(self) -> t.Optional[exp.Expression]: 759 return super()._parse_constraint() or self._parse_projection_def() 760 761 def _parse_alias( 762 self, this: t.Optional[exp.Expression], explicit: bool = False 763 ) -> t.Optional[exp.Expression]: 764 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 765 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 766 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 767 return this 768 769 return super()._parse_alias(this=this, explicit=explicit) 770 771 def _parse_expression(self) -> t.Optional[exp.Expression]: 772 this = super()._parse_expression() 773 774 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 775 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 776 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 777 self._match(TokenType.R_PAREN) 778 779 return this 780 781 def _parse_columns(self) -> exp.Expression: 782 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 783 784 while self._next and self._match_text_seq(")", "APPLY", "("): 785 self._match(TokenType.R_PAREN) 786 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 787 return this
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- INTERVAL_VARS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_HINTS
- LAMBDAS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PROPERTY_PARSERS
- ALTER_ALTER_PARSERS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
789 class Generator(generator.Generator): 790 QUERY_HINTS = False 791 STRUCT_DELIMITER = ("(", ")") 792 NVL2_SUPPORTED = False 793 TABLESAMPLE_REQUIRES_PARENS = False 794 TABLESAMPLE_SIZE_IS_ROWS = False 795 TABLESAMPLE_KEYWORDS = "SAMPLE" 796 LAST_DAY_SUPPORTS_DATE_PART = False 797 CAN_IMPLEMENT_ARRAY_ANY = True 798 SUPPORTS_TO_NUMBER = False 799 JOIN_HINTS = False 800 TABLE_HINTS = False 801 GROUPINGS_SEP = "" 802 SET_OP_MODIFIERS = False 803 SUPPORTS_TABLE_ALIAS_COLUMNS = False 804 VALUES_AS_TABLE = False 805 806 STRING_TYPE_MAPPING = { 807 exp.DataType.Type.CHAR: "String", 808 exp.DataType.Type.LONGBLOB: "String", 809 exp.DataType.Type.LONGTEXT: "String", 810 exp.DataType.Type.MEDIUMBLOB: "String", 811 exp.DataType.Type.MEDIUMTEXT: "String", 812 exp.DataType.Type.TINYBLOB: "String", 813 exp.DataType.Type.TINYTEXT: "String", 814 exp.DataType.Type.TEXT: "String", 815 exp.DataType.Type.VARBINARY: "String", 816 exp.DataType.Type.VARCHAR: "String", 817 } 818 819 SUPPORTED_JSON_PATH_PARTS = { 820 exp.JSONPathKey, 821 exp.JSONPathRoot, 822 exp.JSONPathSubscript, 823 } 824 825 TYPE_MAPPING = { 826 **generator.Generator.TYPE_MAPPING, 827 **STRING_TYPE_MAPPING, 828 exp.DataType.Type.ARRAY: "Array", 829 exp.DataType.Type.BIGINT: "Int64", 830 exp.DataType.Type.DATE32: "Date32", 831 exp.DataType.Type.DATETIME: "DateTime", 832 exp.DataType.Type.DATETIME64: "DateTime64", 833 exp.DataType.Type.TIMESTAMP: "DateTime", 834 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 835 exp.DataType.Type.DOUBLE: "Float64", 836 exp.DataType.Type.ENUM: "Enum", 837 exp.DataType.Type.ENUM8: "Enum8", 838 exp.DataType.Type.ENUM16: "Enum16", 839 exp.DataType.Type.FIXEDSTRING: "FixedString", 840 exp.DataType.Type.FLOAT: "Float32", 841 exp.DataType.Type.INT: "Int32", 842 exp.DataType.Type.MEDIUMINT: "Int32", 843 exp.DataType.Type.INT128: "Int128", 844 exp.DataType.Type.INT256: "Int256", 845 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 846 exp.DataType.Type.MAP: "Map", 847 exp.DataType.Type.NESTED: "Nested", 848 exp.DataType.Type.SMALLINT: "Int16", 849 exp.DataType.Type.STRUCT: "Tuple", 850 exp.DataType.Type.TINYINT: "Int8", 851 exp.DataType.Type.UBIGINT: "UInt64", 852 exp.DataType.Type.UINT: "UInt32", 853 exp.DataType.Type.UINT128: "UInt128", 854 exp.DataType.Type.UINT256: "UInt256", 855 exp.DataType.Type.USMALLINT: "UInt16", 856 exp.DataType.Type.UTINYINT: "UInt8", 857 exp.DataType.Type.IPV4: "IPv4", 858 exp.DataType.Type.IPV6: "IPv6", 859 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 860 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 861 } 862 863 TRANSFORMS = { 864 **generator.Generator.TRANSFORMS, 865 exp.AnyValue: rename_func("any"), 866 exp.ApproxDistinct: rename_func("uniq"), 867 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 868 exp.ArraySize: rename_func("LENGTH"), 869 exp.ArraySum: rename_func("arraySum"), 870 exp.ArgMax: arg_max_or_min_no_count("argMax"), 871 exp.ArgMin: arg_max_or_min_no_count("argMin"), 872 exp.Array: inline_array_sql, 873 exp.CastToStrType: rename_func("CAST"), 874 exp.CountIf: rename_func("countIf"), 875 exp.CompressColumnConstraint: lambda self, 876 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 877 exp.ComputedColumnConstraint: lambda self, 878 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 879 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 880 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 881 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 882 exp.DateStrToDate: rename_func("toDate"), 883 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 884 exp.Explode: rename_func("arrayJoin"), 885 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 886 exp.IsNan: rename_func("isNaN"), 887 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 888 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 889 exp.JSONPathKey: json_path_key_only_name, 890 exp.JSONPathRoot: lambda *_: "", 891 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 892 exp.Nullif: rename_func("nullIf"), 893 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 894 exp.Pivot: no_pivot_sql, 895 exp.Quantile: _quantile_sql, 896 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 897 exp.Rand: rename_func("randCanonical"), 898 exp.StartsWith: rename_func("startsWith"), 899 exp.StrPosition: lambda self, e: self.func( 900 "position", e.this, e.args.get("substr"), e.args.get("position") 901 ), 902 exp.TimeToStr: lambda self, e: self.func( 903 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 904 ), 905 exp.TimeStrToTime: _timestrtotime_sql, 906 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 907 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 908 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 909 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 910 exp.MD5Digest: rename_func("MD5"), 911 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 912 exp.SHA: rename_func("SHA1"), 913 exp.SHA2: sha256_sql, 914 exp.UnixToTime: _unix_to_time_sql, 915 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 916 exp.Trim: trim_sql, 917 exp.Variance: rename_func("varSamp"), 918 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 919 exp.Stddev: rename_func("stddevSamp"), 920 exp.Chr: rename_func("CHAR"), 921 exp.Lag: lambda self, e: self.func( 922 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 923 ), 924 exp.Lead: lambda self, e: self.func( 925 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 926 ), 927 } 928 929 PROPERTIES_LOCATION = { 930 **generator.Generator.PROPERTIES_LOCATION, 931 exp.OnCluster: exp.Properties.Location.POST_NAME, 932 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 933 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 934 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 935 } 936 937 # There's no list in docs, but it can be found in Clickhouse code 938 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 939 ON_CLUSTER_TARGETS = { 940 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 941 "DATABASE", 942 "TABLE", 943 "VIEW", 944 "DICTIONARY", 945 "INDEX", 946 "FUNCTION", 947 "NAMED COLLECTION", 948 } 949 950 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 951 NON_NULLABLE_TYPES = { 952 exp.DataType.Type.ARRAY, 953 exp.DataType.Type.MAP, 954 exp.DataType.Type.STRUCT, 955 } 956 957 def strtodate_sql(self, expression: exp.StrToDate) -> str: 958 strtodate_sql = self.function_fallback_sql(expression) 959 960 if not isinstance(expression.parent, exp.Cast): 961 # StrToDate returns DATEs in other dialects (eg. postgres), so 962 # this branch aims to improve the transpilation to clickhouse 963 return f"CAST({strtodate_sql} AS DATE)" 964 965 return strtodate_sql 966 967 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 968 this = expression.this 969 970 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 971 return self.sql(this) 972 973 return super().cast_sql(expression, safe_prefix=safe_prefix) 974 975 def trycast_sql(self, expression: exp.TryCast) -> str: 976 dtype = expression.to 977 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 978 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 979 dtype.set("nullable", True) 980 981 return super().cast_sql(expression) 982 983 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 984 this = self.json_path_part(expression.this) 985 return str(int(this) + 1) if is_int(this) else this 986 987 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 988 return f"AS {self.sql(expression, 'this')}" 989 990 def _any_to_has( 991 self, 992 expression: exp.EQ | exp.NEQ, 993 default: t.Callable[[t.Any], str], 994 prefix: str = "", 995 ) -> str: 996 if isinstance(expression.left, exp.Any): 997 arr = expression.left 998 this = expression.right 999 elif isinstance(expression.right, exp.Any): 1000 arr = expression.right 1001 this = expression.left 1002 else: 1003 return default(expression) 1004 1005 return prefix + self.func("has", arr.this.unnest(), this) 1006 1007 def eq_sql(self, expression: exp.EQ) -> str: 1008 return self._any_to_has(expression, super().eq_sql) 1009 1010 def neq_sql(self, expression: exp.NEQ) -> str: 1011 return self._any_to_has(expression, super().neq_sql, "NOT ") 1012 1013 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1014 # Manually add a flag to make the search case-insensitive 1015 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1016 return self.func("match", expression.this, regex) 1017 1018 def datatype_sql(self, expression: exp.DataType) -> str: 1019 # String is the standard ClickHouse type, every other variant is just an alias. 1020 # Additionally, any supplied length parameter will be ignored. 1021 # 1022 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1023 if expression.this in self.STRING_TYPE_MAPPING: 1024 dtype = "String" 1025 else: 1026 dtype = super().datatype_sql(expression) 1027 1028 # This section changes the type to `Nullable(...)` if the following conditions hold: 1029 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1030 # and change their semantics 1031 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1032 # constraint: "Type of Map key must be a type, that can be represented by integer or 1033 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1034 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1035 parent = expression.parent 1036 nullable = expression.args.get("nullable") 1037 if nullable is True or ( 1038 nullable is None 1039 and not ( 1040 isinstance(parent, exp.DataType) 1041 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1042 and expression.index in (None, 0) 1043 ) 1044 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1045 ): 1046 dtype = f"Nullable({dtype})" 1047 1048 return dtype 1049 1050 def cte_sql(self, expression: exp.CTE) -> str: 1051 if expression.args.get("scalar"): 1052 this = self.sql(expression, "this") 1053 alias = self.sql(expression, "alias") 1054 return f"{this} AS {alias}" 1055 1056 return super().cte_sql(expression) 1057 1058 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1059 return super().after_limit_modifiers(expression) + [ 1060 ( 1061 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1062 if expression.args.get("settings") 1063 else "" 1064 ), 1065 ( 1066 self.seg("FORMAT ") + self.sql(expression, "format") 1067 if expression.args.get("format") 1068 else "" 1069 ), 1070 ] 1071 1072 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 1073 params = self.expressions(expression, key="params", flat=True) 1074 return self.func(expression.name, *expression.expressions) + f"({params})" 1075 1076 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 1077 return self.func(expression.name, *expression.expressions) 1078 1079 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 1080 return self.anonymousaggfunc_sql(expression) 1081 1082 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 1083 return self.parameterizedagg_sql(expression) 1084 1085 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1086 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1087 1088 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1089 return f"ON CLUSTER {self.sql(expression, 'this')}" 1090 1091 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1092 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1093 exp.Properties.Location.POST_NAME 1094 ): 1095 this_name = self.sql( 1096 expression.this if isinstance(expression.this, exp.Schema) else expression, 1097 "this", 1098 ) 1099 this_properties = " ".join( 1100 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1101 ) 1102 this_schema = self.schema_columns_sql(expression.this) 1103 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1104 1105 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1106 1107 return super().createable_sql(expression, locations) 1108 1109 def create_sql(self, expression: exp.Create) -> str: 1110 # The comment property comes last in CTAS statements, i.e. after the query 1111 query = expression.expression 1112 if isinstance(query, exp.Query): 1113 comment_prop = expression.find(exp.SchemaCommentProperty) 1114 if comment_prop: 1115 comment_prop.pop() 1116 query.replace(exp.paren(query)) 1117 else: 1118 comment_prop = None 1119 1120 create_sql = super().create_sql(expression) 1121 1122 comment_sql = self.sql(comment_prop) 1123 comment_sql = f" {comment_sql}" if comment_sql else "" 1124 1125 return f"{create_sql}{comment_sql}" 1126 1127 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1128 this = self.indent(self.sql(expression, "this")) 1129 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1130 1131 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1132 this = self.sql(expression, "this") 1133 this = f" {this}" if this else "" 1134 expr = self.sql(expression, "expression") 1135 expr = f" {expr}" if expr else "" 1136 index_type = self.sql(expression, "index_type") 1137 index_type = f" TYPE {index_type}" if index_type else "" 1138 granularity = self.sql(expression, "granularity") 1139 granularity = f" GRANULARITY {granularity}" if granularity else "" 1140 1141 return f"INDEX{this}{expr}{index_type}{granularity}" 1142 1143 def partition_sql(self, expression: exp.Partition) -> str: 1144 return f"PARTITION {self.expressions(expression, flat=True)}" 1145 1146 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1147 return f"ID {self.sql(expression.this)}" 1148 1149 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1150 return ( 1151 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1152 ) 1153 1154 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1155 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}"
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
957 def strtodate_sql(self, expression: exp.StrToDate) -> str: 958 strtodate_sql = self.function_fallback_sql(expression) 959 960 if not isinstance(expression.parent, exp.Cast): 961 # StrToDate returns DATEs in other dialects (eg. postgres), so 962 # this branch aims to improve the transpilation to clickhouse 963 return f"CAST({strtodate_sql} AS DATE)" 964 965 return strtodate_sql
967 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 968 this = expression.this 969 970 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 971 return self.sql(this) 972 973 return super().cast_sql(expression, safe_prefix=safe_prefix)
975 def trycast_sql(self, expression: exp.TryCast) -> str: 976 dtype = expression.to 977 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 978 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 979 dtype.set("nullable", True) 980 981 return super().cast_sql(expression)
1018 def datatype_sql(self, expression: exp.DataType) -> str: 1019 # String is the standard ClickHouse type, every other variant is just an alias. 1020 # Additionally, any supplied length parameter will be ignored. 1021 # 1022 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1023 if expression.this in self.STRING_TYPE_MAPPING: 1024 dtype = "String" 1025 else: 1026 dtype = super().datatype_sql(expression) 1027 1028 # This section changes the type to `Nullable(...)` if the following conditions hold: 1029 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1030 # and change their semantics 1031 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1032 # constraint: "Type of Map key must be a type, that can be represented by integer or 1033 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1034 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1035 parent = expression.parent 1036 nullable = expression.args.get("nullable") 1037 if nullable is True or ( 1038 nullable is None 1039 and not ( 1040 isinstance(parent, exp.DataType) 1041 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1042 and expression.index in (None, 0) 1043 ) 1044 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1045 ): 1046 dtype = f"Nullable({dtype})" 1047 1048 return dtype
1058 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1059 return super().after_limit_modifiers(expression) + [ 1060 ( 1061 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1062 if expression.args.get("settings") 1063 else "" 1064 ), 1065 ( 1066 self.seg("FORMAT ") + self.sql(expression, "format") 1067 if expression.args.get("format") 1068 else "" 1069 ), 1070 ]
1091 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1092 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1093 exp.Properties.Location.POST_NAME 1094 ): 1095 this_name = self.sql( 1096 expression.this if isinstance(expression.this, exp.Schema) else expression, 1097 "this", 1098 ) 1099 this_properties = " ".join( 1100 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1101 ) 1102 this_schema = self.schema_columns_sql(expression.this) 1103 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1104 1105 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1106 1107 return super().createable_sql(expression, locations)
1109 def create_sql(self, expression: exp.Create) -> str: 1110 # The comment property comes last in CTAS statements, i.e. after the query 1111 query = expression.expression 1112 if isinstance(query, exp.Query): 1113 comment_prop = expression.find(exp.SchemaCommentProperty) 1114 if comment_prop: 1115 comment_prop.pop() 1116 query.replace(exp.paren(query)) 1117 else: 1118 comment_prop = None 1119 1120 create_sql = super().create_sql(expression) 1121 1122 comment_sql = self.sql(comment_prop) 1123 comment_sql = f" {comment_sql}" if comment_sql else "" 1124 1125 return f"{create_sql}{comment_sql}"
1131 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1132 this = self.sql(expression, "this") 1133 this = f" {this}" if this else "" 1134 expr = self.sql(expression, "expression") 1135 expr = f" {expr}" if expr else "" 1136 index_type = self.sql(expression, "index_type") 1137 index_type = f" TYPE {index_type}" if index_type else "" 1138 granularity = self.sql(expression, "granularity") 1139 granularity = f" GRANULARITY {granularity}" if granularity else "" 1140 1141 return f"INDEX{this}{expr}{index_type}{granularity}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- LIMIT_ONLY_LITERALS
- RENAME_TABLE_WITH_DB
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- STAR_EXCEPT
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- SUPPORTS_EXPLODING_PROJECTIONS
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- PARSE_JSON_NAME
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- PARAMETER_TOKEN
- NAMED_PLACEHOLDER_TOKEN
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- subquery_sql
- qualify_sql
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- renametable_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- try_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- distributedbyproperty_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql