blob: fc0221d51d52d482b720037ea613a013c1a55f80 [file] [log] [blame]
drh75897232000-05-29 14:26:00 +00001/*
2** Copyright (c) 1999, 2000 D. Richard Hipp
3**
4** This program is free software; you can redistribute it and/or
5** modify it under the terms of the GNU General Public
6** License as published by the Free Software Foundation; either
7** version 2 of the License, or (at your option) any later version.
8**
9** This program is distributed in the hope that it will be useful,
10** but WITHOUT ANY WARRANTY; without even the implied warranty of
11** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12** General Public License for more details.
13**
14** You should have received a copy of the GNU General Public
15** License along with this library; if not, write to the
16** Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17** Boston, MA 02111-1307, USA.
18**
19** Author contact information:
20** drh@hwaci.com
21** http://www.hwaci.com/drh/
22**
23*************************************************************************
24** This module contains C code that generates VDBE code used to process
25** the WHERE clause of SQL statements. Also found here are subroutines
26** to generate VDBE code to evaluate expressions.
27**
drhc4a3c772001-04-04 11:48:57 +000028** $Id: where.c,v 1.13 2001/04/04 11:48:58 drh Exp $
drh75897232000-05-29 14:26:00 +000029*/
30#include "sqliteInt.h"
31
32/*
33** The query generator uses an array of instances of this structure to
34** help it analyze the subexpressions of the WHERE clause. Each WHERE
35** clause subexpression is separated from the others by an AND operator.
36*/
37typedef struct ExprInfo ExprInfo;
38struct ExprInfo {
39 Expr *p; /* Pointer to the subexpression */
40 int indexable; /* True if this subexprssion is usable by an index */
drh967e8b72000-06-21 13:59:10 +000041 int idxLeft; /* p->pLeft is a column in this table number. -1 if
42 ** p->pLeft is not the column of any table */
43 int idxRight; /* p->pRight is a column in this table number. -1 if
44 ** p->pRight is not the column of any table */
drh75897232000-05-29 14:26:00 +000045 unsigned prereqLeft; /* Tables referenced by p->pLeft */
46 unsigned prereqRight; /* Tables referenced by p->pRight */
47};
48
49/*
50** Determine the number of elements in an array.
51*/
52#define ARRAYSIZE(X) (sizeof(X)/sizeof(X[0]))
53
54/*
55** This routine is used to divide the WHERE expression into subexpressions
56** separated by the AND operator.
57**
58** aSlot[] is an array of subexpressions structures.
59** There are nSlot spaces left in this array. This routine attempts to
60** split pExpr into subexpressions and fills aSlot[] with those subexpressions.
61** The return value is the number of slots filled.
62*/
63static int exprSplit(int nSlot, ExprInfo *aSlot, Expr *pExpr){
64 int cnt = 0;
65 if( pExpr==0 || nSlot<1 ) return 0;
66 if( nSlot==1 || pExpr->op!=TK_AND ){
67 aSlot[0].p = pExpr;
68 return 1;
69 }
70 if( pExpr->pLeft->op!=TK_AND ){
71 aSlot[0].p = pExpr->pLeft;
72 cnt = 1 + exprSplit(nSlot-1, &aSlot[1], pExpr->pRight);
73 }else{
74 cnt = exprSplit(nSlot, aSlot, pExpr->pRight);
75 cnt += exprSplit(nSlot-cnt, &aSlot[cnt], pExpr->pLeft);
76 }
77 return cnt;
78}
79
80/*
81** This routine walks (recursively) an expression tree and generates
82** a bitmask indicating which tables are used in that expression
83** tree. Bit 0 of the mask is set if table 0 is used. But 1 is set
84** if table 1 is used. And so forth.
85**
86** In order for this routine to work, the calling function must have
87** previously invoked sqliteExprResolveIds() on the expression. See
88** the header comment on that routine for additional information.
drh19a775c2000-06-05 18:54:46 +000089**
90** "base" is the cursor number (the value of the iTable field) that
91** corresponds to the first entry in the table list. This is the
92** same as pParse->nTab.
drh75897232000-05-29 14:26:00 +000093*/
drh19a775c2000-06-05 18:54:46 +000094static int exprTableUsage(int base, Expr *p){
drh75897232000-05-29 14:26:00 +000095 unsigned int mask = 0;
96 if( p==0 ) return 0;
drh967e8b72000-06-21 13:59:10 +000097 if( p->op==TK_COLUMN ){
drh19a775c2000-06-05 18:54:46 +000098 return 1<< (p->iTable - base);
drh75897232000-05-29 14:26:00 +000099 }
100 if( p->pRight ){
drh19a775c2000-06-05 18:54:46 +0000101 mask = exprTableUsage(base, p->pRight);
drh75897232000-05-29 14:26:00 +0000102 }
103 if( p->pLeft ){
drh19a775c2000-06-05 18:54:46 +0000104 mask |= exprTableUsage(base, p->pLeft);
drh75897232000-05-29 14:26:00 +0000105 }
106 return mask;
107}
108
109/*
110** The input to this routine is an ExprInfo structure with only the
111** "p" field filled in. The job of this routine is to analyze the
112** subexpression and populate all the other fields of the ExprInfo
113** structure.
drh19a775c2000-06-05 18:54:46 +0000114**
115** "base" is the cursor number (the value of the iTable field) that
116** corresponds to the first entyr in the table list. This is the
117** same as pParse->nTab.
drh75897232000-05-29 14:26:00 +0000118*/
drh19a775c2000-06-05 18:54:46 +0000119static void exprAnalyze(int base, ExprInfo *pInfo){
drh75897232000-05-29 14:26:00 +0000120 Expr *pExpr = pInfo->p;
drh19a775c2000-06-05 18:54:46 +0000121 pInfo->prereqLeft = exprTableUsage(base, pExpr->pLeft);
122 pInfo->prereqRight = exprTableUsage(base, pExpr->pRight);
drh75897232000-05-29 14:26:00 +0000123 pInfo->indexable = 0;
124 pInfo->idxLeft = -1;
125 pInfo->idxRight = -1;
126 if( pExpr->op==TK_EQ && (pInfo->prereqRight & pInfo->prereqLeft)==0 ){
drh967e8b72000-06-21 13:59:10 +0000127 if( pExpr->pRight->op==TK_COLUMN ){
drh19a775c2000-06-05 18:54:46 +0000128 pInfo->idxRight = pExpr->pRight->iTable - base;
drh75897232000-05-29 14:26:00 +0000129 pInfo->indexable = 1;
130 }
drh967e8b72000-06-21 13:59:10 +0000131 if( pExpr->pLeft->op==TK_COLUMN ){
drh19a775c2000-06-05 18:54:46 +0000132 pInfo->idxLeft = pExpr->pLeft->iTable - base;
drh75897232000-05-29 14:26:00 +0000133 pInfo->indexable = 1;
134 }
135 }
136}
137
138/*
139** Generating the beginning of the loop used for WHERE clause processing.
140** The return value is a pointer to an (opaque) structure that contains
141** information needed to terminate the loop. Later, the calling routine
142** should invoke sqliteWhereEnd() with the return value of this function
143** in order to complete the WHERE clause processing.
144**
145** If an error occurs, this routine returns NULL.
146*/
147WhereInfo *sqliteWhereBegin(
148 Parse *pParse, /* The parser context */
149 IdList *pTabList, /* A list of all tables */
150 Expr *pWhere, /* The WHERE clause */
151 int pushKey /* If TRUE, leave the table key on the stack */
152){
153 int i; /* Loop counter */
154 WhereInfo *pWInfo; /* Will become the return value of this function */
155 Vdbe *v = pParse->pVdbe; /* The virtual database engine */
156 int brk, cont; /* Addresses used during code generation */
157 int *aOrder; /* Order in which pTabList entries are searched */
158 int nExpr; /* Number of subexpressions in the WHERE clause */
159 int loopMask; /* One bit set for each outer loop */
160 int haveKey; /* True if KEY is on the stack */
drh19a775c2000-06-05 18:54:46 +0000161 int base; /* First available index for OP_Open opcodes */
drh75897232000-05-29 14:26:00 +0000162 Index *aIdx[32]; /* Index to use on each nested loop. */
drhc4a3c772001-04-04 11:48:57 +0000163 int aDirect[32]; /* If TRUE, then index this table using ROWID */
drh75897232000-05-29 14:26:00 +0000164 ExprInfo aExpr[50]; /* The WHERE clause is divided into these expressions */
165
166 /* Allocate space for aOrder[]. */
167 aOrder = sqliteMalloc( sizeof(int) * pTabList->nId );
168
169 /* Allocate and initialize the WhereInfo structure that will become the
170 ** return value.
171 */
172 pWInfo = sqliteMalloc( sizeof(WhereInfo) );
173 if( pWInfo==0 ){
174 sqliteFree(aOrder);
175 return 0;
176 }
177 pWInfo->pParse = pParse;
178 pWInfo->pTabList = pTabList;
drh19a775c2000-06-05 18:54:46 +0000179 base = pWInfo->base = pParse->nTab;
drh75897232000-05-29 14:26:00 +0000180
181 /* Split the WHERE clause into as many as 32 separate subexpressions
182 ** where each subexpression is separated by an AND operator. Any additional
183 ** subexpressions are attached in the aExpr[32] and will not enter
184 ** into the query optimizer computations. 32 is chosen as the cutoff
185 ** since that is the number of bits in an integer that we use for an
186 ** expression-used mask.
187 */
188 memset(aExpr, 0, sizeof(aExpr));
189 nExpr = exprSplit(ARRAYSIZE(aExpr), aExpr, pWhere);
190
191 /* Analyze all of the subexpressions.
192 */
193 for(i=0; i<nExpr; i++){
drh19a775c2000-06-05 18:54:46 +0000194 exprAnalyze(pParse->nTab, &aExpr[i]);
drh75897232000-05-29 14:26:00 +0000195 }
196
197 /* Figure out a good nesting order for the tables. aOrder[0] will
198 ** be the index in pTabList of the outermost table. aOrder[1] will
199 ** be the first nested loop and so on. aOrder[pTabList->nId-1] will
200 ** be the innermost loop.
201 **
drh7e391e12000-05-30 20:17:49 +0000202 ** Someday will put in a good algorithm here to reorder the loops
drh75897232000-05-29 14:26:00 +0000203 ** for an effiecient query. But for now, just use whatever order the
204 ** tables appear in in the pTabList.
205 */
206 for(i=0; i<pTabList->nId; i++){
207 aOrder[i] = i;
208 }
209
210 /* Figure out what index to use (if any) for each nested loop.
211 ** Make aIdx[i] point to the index to use for the i-th nested loop
212 ** where i==0 is the outer loop and i==pTabList->nId-1 is the inner
drhc4a3c772001-04-04 11:48:57 +0000213 ** loop. If the expression uses only the ROWID field, then set
214 ** aDirect[i] to 1.
drh75897232000-05-29 14:26:00 +0000215 **
216 ** Actually, if there are more than 32 tables in the join, only the
217 ** first 32 tables are candidates for indices.
218 */
219 loopMask = 0;
220 for(i=0; i<pTabList->nId && i<ARRAYSIZE(aIdx); i++){
drhc4a3c772001-04-04 11:48:57 +0000221 int j;
drh75897232000-05-29 14:26:00 +0000222 int idx = aOrder[i];
223 Table *pTab = pTabList->a[idx].pTab;
224 Index *pIdx;
225 Index *pBestIdx = 0;
226
drhc4a3c772001-04-04 11:48:57 +0000227 /* Check to see if there is an expression that uses only the
228 ** ROWID field of this table. If so, set aDirect[i] to 1.
229 ** If not, set aDirect[i] to 0.
230 */
231 aDirect[i] = 0;
232 for(j=0; j<nExpr; j++){
233 if( aExpr[j].idxLeft==idx && aExpr[j].p->pLeft->iColumn<0
234 && (aExpr[j].prereqRight & loopMask)==aExpr[j].prereqRight ){
235 aDirect[i] = 1;
236 break;
237 }
238 if( aExpr[j].idxRight==idx && aExpr[j].p->pRight->iColumn<0
239 && (aExpr[j].prereqLeft & loopMask)==aExpr[j].prereqLeft ){
240 aDirect[i] = 1;
241 break;
242 }
243 }
244 if( aDirect[i] ){
245 loopMask |= 1<<idx;
246 aIdx[i] = 0;
247 continue;
248 }
249
drh75897232000-05-29 14:26:00 +0000250 /* Do a search for usable indices. Leave pBestIdx pointing to
drh7e391e12000-05-30 20:17:49 +0000251 ** the most specific usable index.
drh75897232000-05-29 14:26:00 +0000252 **
253 ** "Most specific" means that pBestIdx is the usable index that
drh967e8b72000-06-21 13:59:10 +0000254 ** has the largest value for nColumn. A usable index is one for
255 ** which there are subexpressions to compute every column of the
drh75897232000-05-29 14:26:00 +0000256 ** index.
257 */
258 for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){
drh967e8b72000-06-21 13:59:10 +0000259 int columnMask = 0;
drh75897232000-05-29 14:26:00 +0000260
drh967e8b72000-06-21 13:59:10 +0000261 if( pIdx->nColumn>32 ) continue;
drh75897232000-05-29 14:26:00 +0000262 for(j=0; j<nExpr; j++){
263 if( aExpr[j].idxLeft==idx
264 && (aExpr[j].prereqRight & loopMask)==aExpr[j].prereqRight ){
drh967e8b72000-06-21 13:59:10 +0000265 int iColumn = aExpr[j].p->pLeft->iColumn;
drh75897232000-05-29 14:26:00 +0000266 int k;
drh967e8b72000-06-21 13:59:10 +0000267 for(k=0; k<pIdx->nColumn; k++){
268 if( pIdx->aiColumn[k]==iColumn ){
269 columnMask |= 1<<k;
drh75897232000-05-29 14:26:00 +0000270 break;
271 }
272 }
273 }
274 if( aExpr[j].idxRight==idx
275 && (aExpr[j].prereqLeft & loopMask)==aExpr[j].prereqLeft ){
drh967e8b72000-06-21 13:59:10 +0000276 int iColumn = aExpr[j].p->pRight->iColumn;
drh75897232000-05-29 14:26:00 +0000277 int k;
drh967e8b72000-06-21 13:59:10 +0000278 for(k=0; k<pIdx->nColumn; k++){
279 if( pIdx->aiColumn[k]==iColumn ){
280 columnMask |= 1<<k;
drh75897232000-05-29 14:26:00 +0000281 break;
282 }
283 }
284 }
285 }
drh967e8b72000-06-21 13:59:10 +0000286 if( columnMask + 1 == (1<<pIdx->nColumn) ){
287 if( pBestIdx==0 || pBestIdx->nColumn<pIdx->nColumn ){
drh75897232000-05-29 14:26:00 +0000288 pBestIdx = pIdx;
289 }
290 }
291 }
292 aIdx[i] = pBestIdx;
drh7e391e12000-05-30 20:17:49 +0000293 loopMask |= 1<<idx;
drh75897232000-05-29 14:26:00 +0000294 }
295
296 /* Open all tables in the pTabList and all indices in aIdx[].
297 */
298 for(i=0; i<pTabList->nId; i++){
drh345fda32001-01-15 22:51:08 +0000299 sqliteVdbeAddOp(v, OP_OpenTbl, base+i, 0, pTabList->a[i].pTab->zName, 0);
drh75897232000-05-29 14:26:00 +0000300 if( i<ARRAYSIZE(aIdx) && aIdx[i]!=0 ){
drh345fda32001-01-15 22:51:08 +0000301 sqliteVdbeAddOp(v, OP_OpenIdx, base+pTabList->nId+i, 0, aIdx[i]->zName,0);
drh75897232000-05-29 14:26:00 +0000302 }
303 }
drh19a775c2000-06-05 18:54:46 +0000304 memcpy(pWInfo->aIdx, aIdx, sizeof(aIdx));
drh75897232000-05-29 14:26:00 +0000305
306 /* Generate the code to do the search
307 */
308 pWInfo->iBreak = brk = sqliteVdbeMakeLabel(v);
309 loopMask = 0;
310 for(i=0; i<pTabList->nId; i++){
311 int j, k;
312 int idx = aOrder[i];
drhc4a3c772001-04-04 11:48:57 +0000313 int goDirect;
314 Index *pIdx;
drh75897232000-05-29 14:26:00 +0000315
drhc4a3c772001-04-04 11:48:57 +0000316 if( i<ARRAYSIZE(aIdx) ){
317 pIdx = aIdx[i];
318 goDirect = aDirect[i];
319 }else{
320 pIdx = 0;
321 goDirect = 0;
322 }
323
324 if( goDirect ){
325 /* Case 1: We can directly reference a single row using the ROWID field.
326 */
327 cont = brk;
328 for(k=0; k<nExpr; k++){
329 if( aExpr[k].p==0 ) continue;
330 if( aExpr[k].idxLeft==idx
331 && (aExpr[k].prereqRight & loopMask)==aExpr[k].prereqRight
332 && aExpr[k].p->pLeft->iColumn<0
333 ){
334 sqliteExprCode(pParse, aExpr[k].p->pRight);
335 aExpr[k].p = 0;
336 break;
337 }
338 if( aExpr[k].idxRight==idx
339 && (aExpr[k].prereqLeft & loopMask)==aExpr[k].prereqLeft
340 && aExpr[k].p->pRight->iColumn<0
341 ){
342 sqliteExprCode(pParse, aExpr[k].p->pLeft);
343 aExpr[k].p = 0;
344 break;
345 }
346 }
347 sqliteVdbeAddOp(v, OP_AddImm, 0, 0, 0, 0);
348 if( i==pTabList->nId-1 && pushKey ){
349 haveKey = 1;
350 }else{
351 sqliteVdbeAddOp(v, OP_Fetch, base+idx, 0, 0, 0);
352 haveKey = 0;
353 }
354 }else if( pIdx==0 ){
355 /* Case 2: There was no usable index. We must do a complete
drh75897232000-05-29 14:26:00 +0000356 ** scan of the table.
357 */
drhc4a3c772001-04-04 11:48:57 +0000358 cont = sqliteVdbeMakeLabel(v);
drh19a775c2000-06-05 18:54:46 +0000359 sqliteVdbeAddOp(v, OP_Next, base+idx, brk, 0, cont);
drh75897232000-05-29 14:26:00 +0000360 haveKey = 0;
361 }else{
drhc4a3c772001-04-04 11:48:57 +0000362 /* Case 3: We do have a usable index in pIdx.
drh75897232000-05-29 14:26:00 +0000363 */
drhc4a3c772001-04-04 11:48:57 +0000364 cont = sqliteVdbeMakeLabel(v);
drh967e8b72000-06-21 13:59:10 +0000365 for(j=0; j<pIdx->nColumn; j++){
drh75897232000-05-29 14:26:00 +0000366 for(k=0; k<nExpr; k++){
367 if( aExpr[k].p==0 ) continue;
368 if( aExpr[k].idxLeft==idx
369 && (aExpr[k].prereqRight & loopMask)==aExpr[k].prereqRight
drh967e8b72000-06-21 13:59:10 +0000370 && aExpr[k].p->pLeft->iColumn==pIdx->aiColumn[j]
drh75897232000-05-29 14:26:00 +0000371 ){
372 sqliteExprCode(pParse, aExpr[k].p->pRight);
373 aExpr[k].p = 0;
374 break;
375 }
376 if( aExpr[k].idxRight==idx
377 && (aExpr[k].prereqLeft & loopMask)==aExpr[k].prereqLeft
drh967e8b72000-06-21 13:59:10 +0000378 && aExpr[k].p->pRight->iColumn==pIdx->aiColumn[j]
drh75897232000-05-29 14:26:00 +0000379 ){
380 sqliteExprCode(pParse, aExpr[k].p->pLeft);
381 aExpr[k].p = 0;
382 break;
383 }
384 }
385 }
drh967e8b72000-06-21 13:59:10 +0000386 sqliteVdbeAddOp(v, OP_MakeKey, pIdx->nColumn, 0, 0, 0);
drh19a775c2000-06-05 18:54:46 +0000387 sqliteVdbeAddOp(v, OP_Fetch, base+pTabList->nId+i, 0, 0, 0);
388 sqliteVdbeAddOp(v, OP_NextIdx, base+pTabList->nId+i, brk, 0, cont);
drh75897232000-05-29 14:26:00 +0000389 if( i==pTabList->nId-1 && pushKey ){
390 haveKey = 1;
391 }else{
drh63e27412000-08-22 18:29:33 +0000392 sqliteVdbeAddOp(v, OP_Fetch, base+idx, 0, 0, 0);
drh75897232000-05-29 14:26:00 +0000393 haveKey = 0;
394 }
395 }
396 loopMask |= 1<<idx;
397
398 /* Insert code to test every subexpression that can be completely
399 ** computed using the current set of tables.
400 */
401 for(j=0; j<nExpr; j++){
402 if( aExpr[j].p==0 ) continue;
403 if( (aExpr[j].prereqRight & loopMask)!=aExpr[j].prereqRight ) continue;
404 if( (aExpr[j].prereqLeft & loopMask)!=aExpr[j].prereqLeft ) continue;
405 if( haveKey ){
drh573bd272001-02-19 23:23:38 +0000406 haveKey = 0;
drh19a775c2000-06-05 18:54:46 +0000407 sqliteVdbeAddOp(v, OP_Fetch, base+idx, 0, 0, 0);
drh75897232000-05-29 14:26:00 +0000408 }
409 sqliteExprIfFalse(pParse, aExpr[j].p, cont);
410 aExpr[j].p = 0;
411 }
412 brk = cont;
413 }
414 pWInfo->iContinue = cont;
415 if( pushKey && !haveKey ){
drh4794b982000-06-06 13:54:14 +0000416 sqliteVdbeAddOp(v, OP_Key, base, 0, 0, 0);
drh75897232000-05-29 14:26:00 +0000417 }
418 sqliteFree(aOrder);
419 return pWInfo;
420}
421
422/*
423** Generate the end of the WHERE loop.
424*/
425void sqliteWhereEnd(WhereInfo *pWInfo){
426 Vdbe *v = pWInfo->pParse->pVdbe;
drh19a775c2000-06-05 18:54:46 +0000427 int i;
428 int brk = pWInfo->iBreak;
429 int base = pWInfo->base;
430
drh75897232000-05-29 14:26:00 +0000431 sqliteVdbeAddOp(v, OP_Goto, 0, pWInfo->iContinue, 0, 0);
drh19a775c2000-06-05 18:54:46 +0000432 for(i=0; i<pWInfo->pTabList->nId; i++){
433 sqliteVdbeAddOp(v, OP_Close, base+i, 0, 0, brk);
434 brk = 0;
435 if( i<ARRAYSIZE(pWInfo->aIdx) && pWInfo->aIdx[i]!=0 ){
436 sqliteVdbeAddOp(v, OP_Close, base+pWInfo->pTabList->nId+i, 0, 0, 0);
437 }
438 }
439 if( brk!=0 ){
440 sqliteVdbeAddOp(v, OP_Noop, 0, 0, 0, brk);
441 }
drh75897232000-05-29 14:26:00 +0000442 sqliteFree(pWInfo);
443 return;
444}