Blame - src/tokenize.c - chromium.googlesource.com/chromium/deps/sqlite

blob: be97113215aeb1d9fb18cfc522c2fae73d5b5bef [file] [log] [blame]

drh	7589723	2000-05-29 14:26:00 +0000	[diff] [blame]	1	/*
drh	b19a2bc	2001-09-16 00:13:26 +0000	[diff] [blame^]	2	** 2001 September 15
drh	7589723	2000-05-29 14:26:00 +0000	[diff] [blame]	3	**
drh	b19a2bc	2001-09-16 00:13:26 +0000	[diff] [blame^]	4	** The author disclaims copyright to this source code. In place of
				5	** a legal notice, here is a blessing:
drh	7589723	2000-05-29 14:26:00 +0000	[diff] [blame]	6	**
drh	b19a2bc	2001-09-16 00:13:26 +0000	[diff] [blame^]	7	** May you do good and not evil.
				8	** May you find forgiveness for yourself and forgive others.
				9	** May you share freely, never taking more than you give.
drh	7589723	2000-05-29 14:26:00 +0000	[diff] [blame]	10	**
				11	*************************************************************************
				12	** An tokenizer for SQL
				13	**
				14	** This file contains C code that splits an SQL input string up into
				15	** individual tokens and sends those tokens one-by-one over to the
				16	** parser for analysis.
				17	**
drh	b19a2bc	2001-09-16 00:13:26 +0000	[diff] [blame^]	18	** $Id: tokenize.c,v 1.22 2001/09/16 00:13:27 drh Exp $
drh	7589723	2000-05-29 14:26:00 +0000	[diff] [blame]	19	*/
				20	#include "sqliteInt.h"
				21	#include <ctype.h>
drh	dcc581c	2000-05-30 13:44:19 +0000	[diff] [blame]	22	#include <stdlib.h>
drh	7589723	2000-05-29 14:26:00 +0000	[diff] [blame]	23
				24	/*
				25	** All the keywords of the SQL language are stored as in a hash
				26	** table composed of instances of the following structure.
				27	*/
				28	typedef struct Keyword Keyword;
				29	struct Keyword {
				30	char zName; / The keyword name */
				31	int len; /* Number of characters in the keyword */
				32	int tokenType; /* The token value for this keyword */
				33	Keyword pNext; / Next keyword with the same hash */
				34	};
				35
				36	/*
				37	** These are the keywords
				38	*/
				39	static Keyword aKeywordTable[] = {
drh	fef5208	2000-06-06 01:50:43 +0000	[diff] [blame]	40	{ "ALL", 0, TK_ALL, 0 },
drh	7589723	2000-05-29 14:26:00 +0000	[diff] [blame]	41	{ "AND", 0, TK_AND, 0 },
				42	{ "AS", 0, TK_AS, 0 },
				43	{ "ASC", 0, TK_ASC, 0 },
drh	c4a3c77	2001-04-04 11:48:57 +0000	[diff] [blame]	44	{ "BEGIN", 0, TK_BEGIN, 0 },
drh	fef5208	2000-06-06 01:50:43 +0000	[diff] [blame]	45	{ "BETWEEN", 0, TK_BETWEEN, 0 },
drh	7589723	2000-05-29 14:26:00 +0000	[diff] [blame]	46	{ "BY", 0, TK_BY, 0 },
				47	{ "CHECK", 0, TK_CHECK, 0 },
drh	f57b14a	2001-09-14 18:54:08 +0000	[diff] [blame]	48	{ "CLUSTER", 0, TK_CLUSTER, 0 },
drh	c4a3c77	2001-04-04 11:48:57 +0000	[diff] [blame]	49	{ "COMMIT", 0, TK_COMMIT, 0 },
drh	7589723	2000-05-29 14:26:00 +0000	[diff] [blame]	50	{ "CONSTRAINT", 0, TK_CONSTRAINT, 0 },
drh	982cef7	2000-05-30 16:27:03 +0000	[diff] [blame]	51	{ "COPY", 0, TK_COPY, 0 },
drh	7589723	2000-05-29 14:26:00 +0000	[diff] [blame]	52	{ "CREATE", 0, TK_CREATE, 0 },
				53	{ "DEFAULT", 0, TK_DEFAULT, 0 },
				54	{ "DELETE", 0, TK_DELETE, 0 },
drh	982cef7	2000-05-30 16:27:03 +0000	[diff] [blame]	55	{ "DELIMITERS", 0, TK_DELIMITERS, 0 },
drh	7589723	2000-05-29 14:26:00 +0000	[diff] [blame]	56	{ "DESC", 0, TK_DESC, 0 },
drh	efb7251	2000-05-31 20:00:52 +0000	[diff] [blame]	57	{ "DISTINCT", 0, TK_DISTINCT, 0 },
drh	7589723	2000-05-29 14:26:00 +0000	[diff] [blame]	58	{ "DROP", 0, TK_DROP, 0 },
drh	c4a3c77	2001-04-04 11:48:57 +0000	[diff] [blame]	59	{ "END", 0, TK_END, 0 },
drh	82c3d63	2000-06-06 21:56:07 +0000	[diff] [blame]	60	{ "EXCEPT", 0, TK_EXCEPT, 0 },
drh	7589723	2000-05-29 14:26:00 +0000	[diff] [blame]	61	{ "EXPLAIN", 0, TK_EXPLAIN, 0 },
				62	{ "FROM", 0, TK_FROM, 0 },
drh	dce2cbe	2000-05-31 02:27:49 +0000	[diff] [blame]	63	{ "GLOB", 0, TK_GLOB, 0 },
drh	2282792	2000-06-06 17:27:05 +0000	[diff] [blame]	64	{ "GROUP", 0, TK_GROUP, 0 },
				65	{ "HAVING", 0, TK_HAVING, 0 },
drh	fef5208	2000-06-06 01:50:43 +0000	[diff] [blame]	66	{ "IN", 0, TK_IN, 0 },
drh	7589723	2000-05-29 14:26:00 +0000	[diff] [blame]	67	{ "INDEX", 0, TK_INDEX, 0 },
				68	{ "INSERT", 0, TK_INSERT, 0 },
drh	82c3d63	2000-06-06 21:56:07 +0000	[diff] [blame]	69	{ "INTERSECT", 0, TK_INTERSECT, 0 },
drh	7589723	2000-05-29 14:26:00 +0000	[diff] [blame]	70	{ "INTO", 0, TK_INTO, 0 },
				71	{ "IS", 0, TK_IS, 0 },
				72	{ "ISNULL", 0, TK_ISNULL, 0 },
				73	{ "KEY", 0, TK_KEY, 0 },
drh	dce2cbe	2000-05-31 02:27:49 +0000	[diff] [blame]	74	{ "LIKE", 0, TK_LIKE, 0 },
drh	7589723	2000-05-29 14:26:00 +0000	[diff] [blame]	75	{ "NOT", 0, TK_NOT, 0 },
				76	{ "NOTNULL", 0, TK_NOTNULL, 0 },
				77	{ "NULL", 0, TK_NULL, 0 },
				78	{ "ON", 0, TK_ON, 0 },
				79	{ "OR", 0, TK_OR, 0 },
				80	{ "ORDER", 0, TK_ORDER, 0 },
drh	f57b14a	2001-09-14 18:54:08 +0000	[diff] [blame]	81	{ "PRAGMA", 0, TK_PRAGMA, 0 },
drh	7589723	2000-05-29 14:26:00 +0000	[diff] [blame]	82	{ "PRIMARY", 0, TK_PRIMARY, 0 },
drh	c4a3c77	2001-04-04 11:48:57 +0000	[diff] [blame]	83	{ "ROLLBACK", 0, TK_ROLLBACK, 0 },
drh	7589723	2000-05-29 14:26:00 +0000	[diff] [blame]	84	{ "SELECT", 0, TK_SELECT, 0 },
				85	{ "SET", 0, TK_SET, 0 },
				86	{ "TABLE", 0, TK_TABLE, 0 },
drh	c4a3c77	2001-04-04 11:48:57 +0000	[diff] [blame]	87	{ "TRANSACTION", 0, TK_TRANSACTION, 0 },
drh	82c3d63	2000-06-06 21:56:07 +0000	[diff] [blame]	88	{ "UNION", 0, TK_UNION, 0 },
drh	7589723	2000-05-29 14:26:00 +0000	[diff] [blame]	89	{ "UNIQUE", 0, TK_UNIQUE, 0 },
				90	{ "UPDATE", 0, TK_UPDATE, 0 },
drh	982cef7	2000-05-30 16:27:03 +0000	[diff] [blame]	91	{ "USING", 0, TK_USING, 0 },
drh	dce2cbe	2000-05-31 02:27:49 +0000	[diff] [blame]	92	{ "VACUUM", 0, TK_VACUUM, 0 },
drh	7589723	2000-05-29 14:26:00 +0000	[diff] [blame]	93	{ "VALUES", 0, TK_VALUES, 0 },
				94	{ "WHERE", 0, TK_WHERE, 0 },
				95	};
				96
				97	/*
				98	** This is the hash table
				99	*/
drh	daffd0e	2001-04-11 14:28:42 +0000	[diff] [blame]	100	#define KEY_HASH_SIZE 71
drh	7589723	2000-05-29 14:26:00 +0000	[diff] [blame]	101	static Keyword *apHashTable[KEY_HASH_SIZE];
				102
				103
				104	/*
				105	** This function looks up an identifier to determine if it is a
				106	** keyword. If it is a keyword, the token code of that keyword is
				107	** returned. If the input is not a keyword, TK_ID is returned.
				108	*/
				109	static int sqliteKeywordCode(const char *z, int n){
				110	int h;
				111	Keyword *p;
				112	if( aKeywordTable[0].len==0 ){
				113	/* Initialize the keyword hash table */
				114	int i;
				115	int n;
				116	n = sizeof(aKeywordTable)/sizeof(aKeywordTable[0]);
				117	for(i=0; i<n; i++){
				118	aKeywordTable[i].len = strlen(aKeywordTable[i].zName);
				119	h = sqliteHashNoCase(aKeywordTable[i].zName, aKeywordTable[i].len);
				120	h %= KEY_HASH_SIZE;
				121	aKeywordTable[i].pNext = apHashTable[h];
				122	apHashTable[h] = &aKeywordTable[i];
				123	}
				124	}
				125	h = sqliteHashNoCase(z, n) % KEY_HASH_SIZE;
				126	for(p=apHashTable[h]; p; p=p->pNext){
				127	if( p->len==n && sqliteStrNICmp(p->zName, z, n)==0 ){
				128	return p->tokenType;
				129	}
				130	}
				131	return TK_ID;
				132	}
				133
				134	/*
				135	** Return the length of the token that begins at z[0]. Return
				136	** -1 if the token is (or might be) incomplete. Store the token
				137	** type in *tokenType before returning.
				138	*/
				139	int sqliteGetToken(const char z, int tokenType){
				140	int i;
				141	switch( *z ){
drh	30cab80	2000-08-09 17:17:25 +0000	[diff] [blame]	142	case ' ': case '\t': case '\n': case '\f': case '\r': {
drh	7589723	2000-05-29 14:26:00 +0000	[diff] [blame]	143	for(i=1; z[i] && isspace(z[i]); i++){}
				144	*tokenType = TK_SPACE;
				145	return i;
				146	}
				147	case '-': {
				148	if( z[1]==0 ) return -1;
				149	if( z[1]=='-' ){
				150	for(i=2; z[i] && z[i]!='\n'; i++){}
				151	*tokenType = TK_COMMENT;
				152	return i;
				153	}
				154	*tokenType = TK_MINUS;
				155	return 1;
				156	}
				157	case '(': {
				158	*tokenType = TK_LP;
				159	return 1;
				160	}
				161	case ')': {
				162	*tokenType = TK_RP;
				163	return 1;
				164	}
				165	case ';': {
				166	*tokenType = TK_SEMI;
				167	return 1;
				168	}
				169	case '+': {
				170	*tokenType = TK_PLUS;
				171	return 1;
				172	}
				173	case '*': {
				174	*tokenType = TK_STAR;
				175	return 1;
				176	}
				177	case '/': {
				178	*tokenType = TK_SLASH;
				179	return 1;
				180	}
				181	case '=': {
				182	*tokenType = TK_EQ;
				183	return 1 + (z[1]=='=');
				184	}
				185	case '<': {
				186	if( z[1]=='=' ){
				187	*tokenType = TK_LE;
				188	return 2;
				189	}else if( z[1]=='>' ){
				190	*tokenType = TK_NE;
				191	return 2;
				192	}else{
				193	*tokenType = TK_LT;
				194	return 1;
				195	}
				196	}
				197	case '>': {
				198	if( z[1]=='=' ){
				199	*tokenType = TK_GE;
				200	return 2;
				201	}else{
				202	*tokenType = TK_GT;
				203	return 1;
				204	}
				205	}
				206	case '!': {
				207	if( z[1]!='=' ){
				208	*tokenType = TK_ILLEGAL;
drh	c837e70	2000-06-08 16:26:24 +0000	[diff] [blame]	209	return 2;
drh	7589723	2000-05-29 14:26:00 +0000	[diff] [blame]	210	}else{
				211	*tokenType = TK_NE;
				212	return 2;
				213	}
				214	}
drh	0040077	2000-06-16 20:51:26 +0000	[diff] [blame]	215	case '\|': {
				216	if( z[1]!='\|' ){
				217	*tokenType = TK_ILLEGAL;
				218	return 1;
				219	}else{
				220	*tokenType = TK_CONCAT;
				221	return 2;
				222	}
				223	}
drh	7589723	2000-05-29 14:26:00 +0000	[diff] [blame]	224	case ',': {
				225	*tokenType = TK_COMMA;
				226	return 1;
				227	}
				228	case '\'': case '"': {
				229	int delim = z[0];
				230	for(i=1; z[i]; i++){
				231	if( z[i]==delim ){
				232	if( z[i+1]==delim ){
				233	i++;
				234	}else{
				235	break;
				236	}
				237	}
				238	}
				239	if( z[i] ) i++;
				240	*tokenType = TK_STRING;
				241	return i;
				242	}
				243	case '.': {
				244	if( !isdigit(z[1]) ){
				245	*tokenType = TK_DOT;
				246	return 1;
				247	}
				248	/* Fall thru into the next case */
				249	}
				250	case '0': case '1': case '2': case '3': case '4':
				251	case '5': case '6': case '7': case '8': case '9': {
drh	c837e70	2000-06-08 16:26:24 +0000	[diff] [blame]	252	*tokenType = TK_INTEGER;
drh	7589723	2000-05-29 14:26:00 +0000	[diff] [blame]	253	for(i=1; z[i] && isdigit(z[i]); i++){}
				254	if( z[i]=='.' ){
				255	i++;
				256	while( z[i] && isdigit(z[i]) ){ i++; }
drh	c837e70	2000-06-08 16:26:24 +0000	[diff] [blame]	257	*tokenType = TK_FLOAT;
				258	}
				259	if( (z[i]=='e' \|\| z[i]=='E') &&
drh	7589723	2000-05-29 14:26:00 +0000	[diff] [blame]	260	( isdigit(z[i+1])
				261	\|\| ((z[i+1]=='+' \|\| z[i+1]=='-') && isdigit(z[i+2]))
				262	)
drh	c837e70	2000-06-08 16:26:24 +0000	[diff] [blame]	263	){
				264	i += 2;
				265	while( z[i] && isdigit(z[i]) ){ i++; }
drh	7589723	2000-05-29 14:26:00 +0000	[diff] [blame]	266	*tokenType = TK_FLOAT;
				267	}else if( z[0]=='.' ){
				268	*tokenType = TK_FLOAT;
drh	7589723	2000-05-29 14:26:00 +0000	[diff] [blame]	269	}
				270	return i;
				271	}
				272	case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
				273	case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
				274	case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
				275	case 's': case 't': case 'u': case 'v': case 'w': case 'x':
				276	case 'y': case 'z': case '_':
				277	case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
				278	case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
				279	case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
				280	case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
				281	case 'Y': case 'Z': {
				282	for(i=1; z[i] && (isalnum(z[i]) \|\| z[i]=='_'); i++){}
				283	*tokenType = sqliteKeywordCode(z, i);
				284	return i;
				285	}
				286	default: {
				287	break;
				288	}
				289	}
				290	*tokenType = TK_ILLEGAL;
				291	return 1;
				292	}
				293
				294	/*
				295	** Run the parser on the given SQL string. The parser structure is
drh	b19a2bc	2001-09-16 00:13:26 +0000	[diff] [blame^]	296	** passed in. An SQLITE_ status code is returned. If an error occurs
				297	** and pzErrMsg!=NULL then an error message might be written into
				298	** memory obtained from malloc() and *pzErrMsg made to point to that
				299	** error message. Or maybe not.
drh	7589723	2000-05-29 14:26:00 +0000	[diff] [blame]	300	*/
				301	int sqliteRunParser(Parse pParse, char zSql, char **pzErrMsg){
				302	int nErr = 0;
				303	int i;
				304	void *pEngine;
				305	int once = 1;
drh	7589723	2000-05-29 14:26:00 +0000	[diff] [blame]	306	extern void sqliteParserAlloc(void(*)(int));
				307	extern void sqliteParserFree(void, void()(void*));
drh	338ea13	2001-02-11 16:56:24 +0000	[diff] [blame]	308	extern int sqliteParser(void, int, Token, Parse);
drh	7589723	2000-05-29 14:26:00 +0000	[diff] [blame]	309
drh	4c50439	2000-10-16 22:06:40 +0000	[diff] [blame]	310	pParse->db->flags &= ~SQLITE_Interrupt;
				311	pParse->rc = SQLITE_OK;
drh	7589723	2000-05-29 14:26:00 +0000	[diff] [blame]	312	i = 0;
drh	2282792	2000-06-06 17:27:05 +0000	[diff] [blame]	313	sqliteParseInfoReset(pParse);
drh	982cef7	2000-05-30 16:27:03 +0000	[diff] [blame]	314	pEngine = sqliteParserAlloc((void()(int))malloc);
drh	7589723	2000-05-29 14:26:00 +0000	[diff] [blame]	315	if( pEngine==0 ){
				316	sqliteSetString(pzErrMsg, "out of memory", 0);
				317	return 1;
				318	}
drh	daffd0e	2001-04-11 14:28:42 +0000	[diff] [blame]	319	while( sqlite_malloc_failed==0 && nErr==0 && i>=0 && zSql[i]!=0 ){
drh	7589723	2000-05-29 14:26:00 +0000	[diff] [blame]	320	int tokenType;
				321
drh	4c50439	2000-10-16 22:06:40 +0000	[diff] [blame]	322	if( (pParse->db->flags & SQLITE_Interrupt)!=0 ){
				323	pParse->rc = SQLITE_INTERRUPT;
				324	sqliteSetString(pzErrMsg, "interrupt", 0);
				325	break;
				326	}
drh	7589723	2000-05-29 14:26:00 +0000	[diff] [blame]	327	pParse->sLastToken.z = &zSql[i];
				328	pParse->sLastToken.n = sqliteGetToken(&zSql[i], &tokenType);
				329	i += pParse->sLastToken.n;
				330	if( once ){
				331	pParse->sFirstToken = pParse->sLastToken;
				332	once = 0;
				333	}
				334	switch( tokenType ){
				335	case TK_SPACE:
				336	break;
				337	case TK_COMMENT: {
drh	7589723	2000-05-29 14:26:00 +0000	[diff] [blame]	338	break;
				339	}
				340	case TK_ILLEGAL:
drh	c837e70	2000-06-08 16:26:24 +0000	[diff] [blame]	341	sqliteSetNString(pzErrMsg, "unrecognized token: \"", -1,
				342	pParse->sLastToken.z, pParse->sLastToken.n, "\"", 1, 0);
drh	7589723	2000-05-29 14:26:00 +0000	[diff] [blame]	343	nErr++;
				344	break;
				345	default:
				346	sqliteParser(pEngine, tokenType, pParse->sLastToken, pParse);
drh	6e142f5	2000-06-08 13:36:40 +0000	[diff] [blame]	347	if( pParse->zErrMsg && pParse->sErrToken.z ){
drh	7589723	2000-05-29 14:26:00 +0000	[diff] [blame]	348	sqliteSetNString(pzErrMsg, "near \"", -1,
				349	pParse->sErrToken.z, pParse->sErrToken.n,
				350	"\": ", -1,
				351	pParse->zErrMsg, -1,
				352	0);
				353	nErr++;
drh	6e142f5	2000-06-08 13:36:40 +0000	[diff] [blame]	354	sqliteFree(pParse->zErrMsg);
				355	pParse->zErrMsg = 0;
drh	7589723	2000-05-29 14:26:00 +0000	[diff] [blame]	356	}
				357	break;
				358	}
				359	}
drh	4c50439	2000-10-16 22:06:40 +0000	[diff] [blame]	360	if( nErr==0 && (pParse->db->flags & SQLITE_Interrupt)==0 ){
drh	7589723	2000-05-29 14:26:00 +0000	[diff] [blame]	361	sqliteParser(pEngine, 0, pParse->sLastToken, pParse);
drh	6e142f5	2000-06-08 13:36:40 +0000	[diff] [blame]	362	if( pParse->zErrMsg && pParse->sErrToken.z ){
drh	7589723	2000-05-29 14:26:00 +0000	[diff] [blame]	363	sqliteSetNString(pzErrMsg, "near \"", -1,
				364	pParse->sErrToken.z, pParse->sErrToken.n,
				365	"\": ", -1,
				366	pParse->zErrMsg, -1,
				367	0);
				368	nErr++;
drh	6e142f5	2000-06-08 13:36:40 +0000	[diff] [blame]	369	sqliteFree(pParse->zErrMsg);
				370	pParse->zErrMsg = 0;
drh	7589723	2000-05-29 14:26:00 +0000	[diff] [blame]	371	}
				372	}
drh	dcc581c	2000-05-30 13:44:19 +0000	[diff] [blame]	373	sqliteParserFree(pEngine, free);
drh	7589723	2000-05-29 14:26:00 +0000	[diff] [blame]	374	if( pParse->zErrMsg ){
				375	if( pzErrMsg ){
drh	6e142f5	2000-06-08 13:36:40 +0000	[diff] [blame]	376	sqliteFree(*pzErrMsg);
drh	7589723	2000-05-29 14:26:00 +0000	[diff] [blame]	377	*pzErrMsg = pParse->zErrMsg;
				378	}else{
				379	sqliteFree(pParse->zErrMsg);
				380	}
				381	if( !nErr ) nErr++;
				382	}
				383	if( pParse->pVdbe ){
				384	sqliteVdbeDelete(pParse->pVdbe);
				385	pParse->pVdbe = 0;
				386	}
				387	if( pParse->pNewTable ){
				388	sqliteDeleteTable(pParse->db, pParse->pNewTable);
				389	pParse->pNewTable = 0;
				390	}
drh	2282792	2000-06-06 17:27:05 +0000	[diff] [blame]	391	sqliteParseInfoReset(pParse);
drh	4c50439	2000-10-16 22:06:40 +0000	[diff] [blame]	392	if( nErr>0 && pParse->rc==SQLITE_OK ){
				393	pParse->rc = SQLITE_ERROR;
				394	}
drh	7589723	2000-05-29 14:26:00 +0000	[diff] [blame]	395	return nErr;
				396	}