From 2c6577d38041c96bfa26825078d38d4cba31a21a Mon Sep 17 00:00:00 2001 From: "mathilde.oustlant" Date: Mon, 11 Mar 2019 16:29:47 +0100 Subject: Added HQL Keywords --- sqlparse/keywords.py | 99 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 97 insertions(+), 2 deletions(-) (limited to 'sqlparse') diff --git a/sqlparse/keywords.py b/sqlparse/keywords.py index 602051a..761d14e 100644 --- a/sqlparse/keywords.py +++ b/sqlparse/keywords.py @@ -16,6 +16,7 @@ def is_keyword(value): return (KEYWORDS_COMMON.get(val) or KEYWORDS_ORACLE.get(val) or KEYWORDS_PLPGSQL.get(val) + or KEYWORDS_HQL.get(val) or KEYWORDS.get(val, tokens.Name)), value @@ -63,8 +64,8 @@ SQL_REGEX = { (r'[A-ZÀ-Ü]\w*(?=\()', tokens.Name), # side effect: change kw to func (r'-?0x[\dA-F]+', tokens.Number.Hexadecimal), (r'-?\d*(\.\d+)?E-?\d+', tokens.Number.Float), - (r'-?(\d+(\.\d*)|\.\d+)', tokens.Number.Float), - (r'-?\d+(?![_A-ZÀ-Ü])', tokens.Number.Integer), + (r'(?![_A-ZÀ-Ü])-?(\d+(\.\d*)|\.\d+)(?![_A-ZÀ-Ü])', tokens.Number.Float), + (r'(?![_A-ZÀ-Ü])-?\d+(?![_A-ZÀ-Ü])', tokens.Number.Integer), (r"'(''|\\\\|\\'|[^'])*'", tokens.String.Single), # not a real string literal in ANSI SQL: (r'"(""|\\\\|\\"|[^"])*"', tokens.String.Symbol), @@ -82,6 +83,7 @@ SQL_REGEX = { (r'DOUBLE\s+PRECISION\b', tokens.Name.Builtin), (r'GROUP\s+BY\b', tokens.Keyword), (r'ORDER\s+BY\b', tokens.Keyword), + (r'(LATERAL\s+VIEW\s+)(EXPLODE|INLINE|PARSE_URL_TUPLE|POSEXPLODE|STACK)\b', tokens.Keyword), (r'[0-9_A-ZÀ-Ü][_$#\w]*', is_keyword), @@ -634,14 +636,20 @@ KEYWORDS = { 'DATE': tokens.Name.Builtin, 'DEC': tokens.Name.Builtin, 'DECIMAL': tokens.Name.Builtin, + 'FILE_TYPE': tokens.Name.Builtin, 'FLOAT': tokens.Name.Builtin, 'INT': tokens.Name.Builtin, 'INT8': tokens.Name.Builtin, 'INTEGER': tokens.Name.Builtin, 'INTERVAL': tokens.Name.Builtin, 'LONG': tokens.Name.Builtin, + 'NATURALN': tokens.Name.Builtin, + 'NVARCHAR': tokens.Name.Builtin, 'NUMBER': tokens.Name.Builtin, 'NUMERIC': tokens.Name.Builtin, + 'PLS_INTEGER': tokens.Name.Builtin, + 'POSITIVE': tokens.Name.Builtin, + 'POSITIVEN': tokens.Name.Builtin, 'REAL': tokens.Name.Builtin, 'ROWID': tokens.Name.Builtin, 'ROWLABEL': tokens.Name.Builtin, @@ -649,11 +657,18 @@ KEYWORDS = { 'SERIAL': tokens.Name.Builtin, 'SERIAL8': tokens.Name.Builtin, 'SIGNED': tokens.Name.Builtin, + 'SIGNTYPE': tokens.Name.Builtin, + 'SIMPLE_DOUBLE': tokens.Name.Builtin, + 'SIMPLE_FLOAT': tokens.Name.Builtin, + 'SIMPLE_INTEGER': tokens.Name.Builtin, 'SMALLINT': tokens.Name.Builtin, + 'SYS_REFCURSOR': tokens.Name.Builtin, 'SYSDATE': tokens.Name, 'TEXT': tokens.Name.Builtin, 'TINYINT': tokens.Name.Builtin, 'UNSIGNED': tokens.Name.Builtin, + 'UROWID': tokens.Name.Builtin, + 'UTL_FILE': tokens.Name.Builtin, 'VARCHAR': tokens.Name.Builtin, 'VARCHAR2': tokens.Name.Builtin, 'VARYING': tokens.Name.Builtin, @@ -854,3 +869,83 @@ KEYWORDS_PLPGSQL = { 'IN': tokens.Keyword, 'LOOP': tokens.Keyword, } + +# Hive Syntax +KEYWORDS_HQL = { + 'EXPLODE': tokens.Keyword, + 'DIRECTORY': tokens.Keyword, + 'DISTRIBUTE': tokens.Keyword, + 'INCLUDE': tokens.Keyword, + 'LOCATE': tokens.Keyword, + 'OVERWRITE': tokens.Keyword, + 'POSEXPLODE': tokens.Keyword, + + 'ARRAY_CONTAINS': tokens.Keyword, + 'CMP': tokens.Keyword, + 'COLLECT_LIST': tokens.Keyword, + 'CONCAT': tokens.Keyword, + 'CONDITION': tokens.Keyword, + 'DATE_ADD': tokens.Keyword, + 'DATE_SUB': tokens.Keyword, + 'DECODE': tokens.Keyword, + 'DBMS_OUTPUT': tokens.Keyword, + 'ELEMENTS': tokens.Keyword, + 'EXCHANGE': tokens.Keyword, + 'EXTENDED': tokens.Keyword, + 'FLOOR': tokens.Keyword, + 'FOLLOWING': tokens.Keyword, + 'FROM_UNIXTIME': tokens.Keyword, + 'FTP': tokens.Keyword, + 'HOUR': tokens.Keyword, + 'INLINE': tokens.Keyword, + 'INSTR': tokens.Keyword, + 'LEN': tokens.Keyword, + 'MAXELEMENT': tokens.Keyword, + 'MAXINDEX': tokens.Keyword, + 'MAX_PART_DATE': tokens.Keyword, + 'MAX_PART_INT': tokens.Keyword, + 'MAX_PART_STRING': tokens.Keyword, + 'MINELEMENT': tokens.Keyword, + 'MININDEX': tokens.Keyword, + 'MIN_PART_DATE': tokens.Keyword, + 'MIN_PART_INT': tokens.Keyword, + 'MIN_PART_STRING': tokens.Keyword, + 'NOW': tokens.Keyword, + 'NVL': tokens.Keyword, + 'NVL2': tokens.Keyword, + 'PARSE_URL_TUPLE': tokens.Keyword, + 'PART_LOC': tokens.Keyword, + 'PART_COUNT': tokens.Keyword, + 'PART_COUNT_BY': tokens.Keyword, + 'PRINT': tokens.Keyword, + 'PUT_LINE': tokens.Keyword, + 'RANGE': tokens.Keyword, + 'REDUCE': tokens.Keyword, + 'REGEXP_REPLACE': tokens.Keyword, + 'RESIGNAL': tokens.Keyword, + 'RTRIM': tokens.Keyword, + 'SIGN': tokens.Keyword, + 'SIGNAL': tokens.Keyword, + 'SIN': tokens.Keyword, + 'SPLIT': tokens.Keyword, + 'SQRT': tokens.Keyword, + 'STACK': tokens.Keyword, + 'STR': tokens.Keyword, + 'SUBSTR': tokens.Keyword, + 'SUMMARY': tokens.Keyword, + 'TBLPROPERTIES': tokens.Keyword, + 'TIMESTAMP_ISO': tokens.Keyword, + 'TO_CHAR': tokens.Keyword, + 'TO_DATE': tokens.Keyword, + 'TO_TIMESTAMP': tokens.Keyword, + 'TRUNC': tokens.Keyword, + 'UNBOUNDED': tokens.Keyword, + 'UNIQUEJOIN': tokens.Keyword, + 'UNIX_TIMESTAMP': tokens.Keyword, + 'UTC_TIMESTAMP': tokens.Keyword, + 'VIEWS': tokens.Keyword, + + 'EXIT': tokens.Keyword, + 'BREAK': tokens.Keyword, + 'LEAVE': tokens.Keyword, +} -- cgit v1.2.1