The case_sensitive_like pragma added.
Test cases added for the LIKE optimization. (CVS 2592)

FossilOrigin-Name: 72ee21c05e618b6f46f5460f8c85779c72fe32d7
diff --git a/src/callback.c b/src/callback.c
index 1abf826..8b585a1 100644
--- a/src/callback.c
+++ b/src/callback.c
@@ -13,7 +13,7 @@
 ** This file contains functions used to access the internal hash tables
 ** of user defined functions and collation sequences.
 **
-** $Id: callback.c,v 1.2 2005/05/25 10:45:10 danielk1977 Exp $
+** $Id: callback.c,v 1.3 2005/08/14 01:20:38 drh Exp $
 */
 
 #include "sqliteInt.h"
@@ -147,7 +147,7 @@
 ** the collation sequence name. A pointer to this string is stored in
 ** each collation sequence structure.
 */
-static CollSeq * findCollSeqEntry(
+static CollSeq *findCollSeqEntry(
   sqlite3 *db,
   const char *zName,
   int nName,
@@ -286,10 +286,9 @@
   ** new entry to the hash table and return it.
   */
   if( createFlag && bestmatch<6 && 
-      (pBest = sqliteMalloc(sizeof(*pBest)+nName+1)) ){
+      (pBest = sqliteMalloc(sizeof(*pBest)+nName)) ){
     pBest->nArg = nArg;
     pBest->pNext = pFirst;
-    pBest->zName = (char*)&pBest[1];
     pBest->iPrefEnc = enc;
     memcpy(pBest->zName, zName, nName);
     pBest->zName[nName] = 0;
diff --git a/src/func.c b/src/func.c
index c58bf21..43ee458 100644
--- a/src/func.c
+++ b/src/func.c
@@ -16,7 +16,7 @@
 ** sqliteRegisterBuildinFunctions() found at the bottom of the file.
 ** All other code has file scope.
 **
-** $Id: func.c,v 1.103 2005/08/13 03:07:47 drh Exp $
+** $Id: func.c,v 1.104 2005/08/14 01:20:38 drh Exp $
 */
 #include "sqliteInt.h"
 #include <ctype.h>
@@ -26,6 +26,9 @@
 #include "vdbeInt.h"
 #include "os.h"
 
+/*
+** Return the collating function associated with a function.
+*/
 static CollSeq *sqlite3GetFuncCollSeq(sqlite3_context *context){
   return context->pColl;
 }
@@ -308,16 +311,14 @@
   u8 matchSet;
   u8 noCase;
 };
+
 static const struct compareInfo globInfo = { '*', '?', '[', 0 };
-#ifndef SQLITE_CASE_SENSITIVE_LIKE
-  /* The correct SQL-92 behavior is for the LIKE operator to ignore
-  ** case.  Thus  'a' LIKE 'A' would be true. */
-  static const struct compareInfo likeInfo = { '%', '_',   0, 1 };
-#else
-  /* If SQLITE_CASE_SENSITIVE_LIKE is defined, then the LIKE operator
-  ** is case sensitive causing 'a' LIKE 'A' to be false */
-  static const struct compareInfo likeInfo = { '%', '_',   0, 0 };
-#endif
+/* The correct SQL-92 behavior is for the LIKE operator to ignore
+** case.  Thus  'a' LIKE 'A' would be true. */
+static const struct compareInfo likeInfoNorm = { '%', '_',   0, 1 };
+/* If SQLITE_CASE_SENSITIVE_LIKE is defined, then the LIKE operator
+** is case sensitive causing 'a' LIKE 'A' to be false */
+static const struct compareInfo likeInfoAlt = { '%', '_',   0, 0 };
 
 /*
 ** X is a pointer to the first byte of a UTF-8 character.  Increment
@@ -459,6 +460,15 @@
   return *zString==0;
 }
 
+/*
+** Count the number of times that the LIKE operator (or GLOB which is
+** just a variation of LIKE) gets called.  This is used for testing
+** only.
+*/
+#ifdef SQLITE_TEST
+int sqlite3_like_count = 0;
+#endif
+
 
 /*
 ** Implementation of the like() SQL function.  This function implements
@@ -469,8 +479,8 @@
 **
 ** is implemented as like(B,A).
 **
-** If the pointer retrieved by via a call to sqlite3_user_data() is
-** not NULL, then this function uses UTF-16. Otherwise UTF-8.
+** This same function (with a different compareInfo structure) computes
+** the GLOB operator.
 */
 static void likeFunc(
   sqlite3_context *context, 
@@ -493,24 +503,11 @@
     escape = sqlite3ReadUtf8(zEsc);
   }
   if( zA && zB ){
-    sqlite3_result_int(context, patternCompare(zA, zB, &likeInfo, escape));
-  }
-}
-
-/*
-** Implementation of the glob() SQL function.  This function implements
-** the build-in GLOB operator.  The first argument to the function is the
-** string and the second argument is the pattern.  So, the SQL statements:
-**
-**       A GLOB B
-**
-** is implemented as glob(B,A).
-*/
-static void globFunc(sqlite3_context *context, int arg, sqlite3_value **argv){
-  const unsigned char *zA = sqlite3_value_text(argv[0]);
-  const unsigned char *zB = sqlite3_value_text(argv[1]);
-  if( zA && zB ){
-    sqlite3_result_int(context, patternCompare(zA, zB, &globInfo, 0));
+    struct compareInfo *pInfo = sqlite3_user_data(context);
+#ifdef SQLITE_TEST
+    sqlite3_like_count++;
+#endif
+    sqlite3_result_int(context, patternCompare(zA, zB, pInfo, escape));
   }
 }
 
@@ -971,9 +968,6 @@
     { "coalesce",           1, 0, SQLITE_UTF8,    0, 0          },
     { "ifnull",             2, 0, SQLITE_UTF8,    1, ifnullFunc },
     { "random",            -1, 0, SQLITE_UTF8,    0, randomFunc },
-    { "like",               2, 0, SQLITE_UTF8,    0, likeFunc   },
-    { "like",               3, 0, SQLITE_UTF8,    0, likeFunc   },
-    { "glob",               2, 0, SQLITE_UTF8,    0, globFunc   },
     { "nullif",             2, 0, SQLITE_UTF8,    1, nullifFunc },
     { "sqlite_version",     0, 0, SQLITE_UTF8,    0, versionFunc},
     { "quote",              1, 0, SQLITE_UTF8,    0, quoteFunc  },
@@ -1045,8 +1039,77 @@
   }
   sqlite3RegisterDateTimeFunctions(db);
 #ifdef SQLITE_SSE
-  {
-    sqlite3SseFunctions(db);
-  }
+  sqlite3SseFunctions(db);
 #endif
+#ifdef SQLITE_CASE_SENSITIVE_LIKE
+  sqlite3RegisterLikeFunctions(db, 1);
+#else
+  sqlite3RegisterLikeFunctions(db, 0);
+#endif
+}
+
+/*
+** Set the LIKEOPT flag on the 2-argument function with the given name.
+*/
+static void setLikeOptFlag(sqlite3 *db, const char *zName){
+  FuncDef *pDef;
+  pDef = sqlite3FindFunction(db, zName, strlen(zName), 2, SQLITE_UTF8, 0);
+  if( pDef ){
+    pDef->flags = SQLITE_FUNC_LIKEOPT;
+  }
+}
+
+/*
+** Register the built-in LIKE and GLOB functions.  The caseSensitive
+** parameter determines whether or not the LIKE operator is case
+** sensitive.  GLOB is always case sensitive.
+*/
+void sqlite3RegisterLikeFunctions(sqlite3 *db, int caseSensitive){
+  struct compareInfo *pInfo;
+  if( caseSensitive ){
+    pInfo = (struct compareInfo*)&likeInfoAlt;
+  }else{
+    pInfo = (struct compareInfo*)&likeInfoNorm;
+  }
+  sqlite3_create_function(db, "like", 2, SQLITE_UTF8, pInfo, likeFunc, 0, 0);
+  sqlite3_create_function(db, "like", 3, SQLITE_UTF8, pInfo, likeFunc, 0, 0);
+  sqlite3_create_function(db, "glob", 2, SQLITE_UTF8, 
+      (struct compareInfo*)&globInfo, likeFunc, 0,0);
+  setLikeOptFlag(db, "glob");
+  if( caseSensitive ){
+    setLikeOptFlag(db, "like");
+  }
+}
+
+/*
+** pExpr points to an expression which implements a function.  If
+** it is appropriate to apply the LIKE optimization to that function
+** then set aWc[0] through aWc[2] to the wildcard characters and
+** return TRUE.  If the function is not a LIKE-style function then
+** return FALSE.
+*/
+int sqlite3IsLikeFunction(sqlite3 *db, Expr *pExpr, char *aWc){
+  FuncDef *pDef;
+  if( pExpr->op!=TK_FUNCTION ){
+    return 0;
+  }
+  if( pExpr->pList->nExpr!=2 ){
+    return 0;
+  }
+  pDef = sqlite3FindFunction(db, pExpr->token.z, pExpr->token.n, 2,
+                             SQLITE_UTF8, 0);
+  if( pDef==0 || (pDef->flags & SQLITE_FUNC_LIKEOPT)==0 ){
+    return 0;
+  }
+
+  /* The memcpy() statement assumes that the wildcard characters are
+  ** the first three statements in the compareInfo structure.  The
+  ** asserts() that follow verify that assumption
+  */
+  memcpy(aWc, pDef->pUserData, 3);
+  assert( (char*)&likeInfoAlt == (char*)&likeInfoAlt.matchAll );
+  assert( &((char*)&likeInfoAlt)[1] == (char*)&likeInfoAlt.matchOne );
+  assert( &((char*)&likeInfoAlt)[2] == (char*)&likeInfoAlt.matchSet );
+
+  return 1;
 }
diff --git a/src/main.c b/src/main.c
index 23dcd24..a84f94f 100644
--- a/src/main.c
+++ b/src/main.c
@@ -14,7 +14,7 @@
 ** other files are for internal use by SQLite and should not be
 ** accessed by users of the library.
 **
-** $Id: main.c,v 1.297 2005/08/11 02:10:19 drh Exp $
+** $Id: main.c,v 1.298 2005/08/14 01:20:39 drh Exp $
 */
 #include "sqliteInt.h"
 #include "os.h"
@@ -471,6 +471,7 @@
 
   p = sqlite3FindFunction(db, zFunctionName, nName, nArg, enc, 1);
   if( p==0 ) return SQLITE_NOMEM;
+  p->flags = 0;
   p->xFunc = xFunc;
   p->xStep = xStep;
   p->xFinalize = xFinal;
diff --git a/src/pragma.c b/src/pragma.c
index c12a4f8..bac2177 100644
--- a/src/pragma.c
+++ b/src/pragma.c
@@ -11,7 +11,7 @@
 *************************************************************************
 ** This file contains code used to implement the PRAGMA command.
 **
-** $Id: pragma.c,v 1.97 2005/08/13 00:56:27 drh Exp $
+** $Id: pragma.c,v 1.98 2005/08/14 01:20:39 drh Exp $
 */
 #include "sqliteInt.h"
 #include "os.h"
@@ -603,14 +603,25 @@
 #ifndef NDEBUG
   if( sqlite3StrICmp(zLeft, "parser_trace")==0 ){
     extern void sqlite3ParserTrace(FILE*, char *);
-    if( getBoolean(zRight) ){
-      sqlite3ParserTrace(stderr, "parser: ");
-    }else{
-      sqlite3ParserTrace(0, 0);
+    if( zRight ){
+      if( getBoolean(zRight) ){
+        sqlite3ParserTrace(stderr, "parser: ");
+      }else{
+        sqlite3ParserTrace(0, 0);
+      }
     }
   }else
 #endif
 
+  /* Reinstall the LIKE and GLOB functions.  The variant of LIKE
+  ** used will be case sensitive or not depending on the RHS.
+  */
+  if( sqlite3StrICmp(zLeft, "case_sensitive_like")==0 ){
+    if( zRight ){
+      sqlite3RegisterLikeFunctions(db, getBoolean(zRight));
+    }
+  }else
+
 #ifndef SQLITE_OMIT_INTEGRITY_CHECK
   if( sqlite3StrICmp(zLeft, "integrity_check")==0 ){
     int i, j, addr;
diff --git a/src/sqliteInt.h b/src/sqliteInt.h
index 9cfce8a..1d6f78b 100644
--- a/src/sqliteInt.h
+++ b/src/sqliteInt.h
@@ -11,7 +11,7 @@
 *************************************************************************
 ** Internal interface definitions for SQLite.
 **
-** @(#) $Id: sqliteInt.h,v 1.401 2005/08/12 22:56:09 drh Exp $
+** @(#) $Id: sqliteInt.h,v 1.402 2005/08/14 01:20:39 drh Exp $
 */
 #ifndef _SQLITEINT_H_
 #define _SQLITEINT_H_
@@ -496,18 +496,24 @@
 ** points to a linked list of these structures.
 */
 struct FuncDef {
-  char *zName;         /* SQL name of the function */
-  int nArg;            /* Number of arguments.  -1 means unlimited */
+  i16 nArg;            /* Number of arguments.  -1 means unlimited */
   u8 iPrefEnc;         /* Preferred text encoding (SQLITE_UTF8, 16LE, 16BE) */
+  u8 needCollSeq;      /* True if sqlite3GetFuncCollSeq() might be called */
+  u8 flags;            /* Some combination of SQLITE_FUNC_* */
   void *pUserData;     /* User data parameter */
   FuncDef *pNext;      /* Next function with same name */
   void (*xFunc)(sqlite3_context*,int,sqlite3_value**); /* Regular function */
   void (*xStep)(sqlite3_context*,int,sqlite3_value**); /* Aggregate step */
   void (*xFinalize)(sqlite3_context*);                /* Aggregate finializer */
-  u8 needCollSeq;      /* True if sqlite3GetFuncCollSeq() might be called */
+  char zName[1];       /* SQL name of the function.  MUST BE LAST */
 };
 
 /*
+** Possible values for FuncDef.flags
+*/
+#define SQLITE_FUNC_LIKEOPT  0x01    /* Candidate for the LIKE optimization */
+
+/*
 ** information about each column of an SQL table is held in an instance
 ** of this structure.
 */
@@ -1576,6 +1582,8 @@
 int sqlite3FindDb(sqlite3*, Token*);
 void sqlite3AnalysisLoad(sqlite3*,int iDB);
 void sqlite3DefaultRowEst(Index*);
+void sqlite3RegisterLikeFunctions(sqlite3*, int);
+int sqlite3IsLikeFunction(sqlite3*,Expr*,char*);
 
 #ifdef SQLITE_SSE
 #include "sseInt.h"
diff --git a/src/test1.c b/src/test1.c
index aaa3910..b08333e 100644
--- a/src/test1.c
+++ b/src/test1.c
@@ -13,7 +13,7 @@
 ** is not included in the SQLite library.  It is used for automated
 ** testing of the SQLite library.
 **
-** $Id: test1.c,v 1.153 2005/08/11 02:10:19 drh Exp $
+** $Id: test1.c,v 1.154 2005/08/14 01:20:39 drh Exp $
 */
 #include "sqliteInt.h"
 #include "tcl.h"
@@ -2797,6 +2797,12 @@
   Tcl_SetVar2(interp,"sqlite_options","default_autovacuum","1",TCL_GLOBAL_ONLY);
 #endif
 
+#ifdef SQLITE_OMIT_BETWEEN_OPTIMIZATION
+  Tcl_SetVar2(interp, "sqlite_options", "between_opt", "0", TCL_GLOBAL_ONLY);
+#else
+  Tcl_SetVar2(interp, "sqlite_options", "between_opt", "1", TCL_GLOBAL_ONLY);
+#endif
+
 #ifdef SQLITE_OMIT_BLOB_LITERAL
   Tcl_SetVar2(interp, "sqlite_options", "bloblit", "0", TCL_GLOBAL_ONLY);
 #else
@@ -2869,12 +2875,24 @@
   Tcl_SetVar2(interp, "sqlite_options", "integrityck", "1", TCL_GLOBAL_ONLY);
 #endif
 
+#ifdef SQLITE_OMIT_LIKE_OPTIMIZATION
+  Tcl_SetVar2(interp, "sqlite_options", "like_opt", "0", TCL_GLOBAL_ONLY);
+#else
+  Tcl_SetVar2(interp, "sqlite_options", "like_opt", "1", TCL_GLOBAL_ONLY);
+#endif
+
 #ifdef SQLITE_OMIT_MEMORYDB
   Tcl_SetVar2(interp, "sqlite_options", "memorydb", "0", TCL_GLOBAL_ONLY);
 #else
   Tcl_SetVar2(interp, "sqlite_options", "memorydb", "1", TCL_GLOBAL_ONLY);
 #endif
 
+#ifdef SQLITE_OMIT_OR_OPTIMIZATION
+  Tcl_SetVar2(interp, "sqlite_options", "or_opt", "0", TCL_GLOBAL_ONLY);
+#else
+  Tcl_SetVar2(interp, "sqlite_options", "or_opt", "1", TCL_GLOBAL_ONLY);
+#endif
+
 #ifdef SQLITE_OMIT_PAGER_PRAGMAS
   Tcl_SetVar2(interp, "sqlite_options", "pager_pragmas", "0", TCL_GLOBAL_ONLY);
 #else
@@ -3094,6 +3112,7 @@
   extern int sqlite3_memUsed;
   extern int sqlite3_memMax;
   extern char sqlite3_query_plan[];
+  extern int sqlite3_like_count;
   static char *query_plan = sqlite3_query_plan;
 
   for(i=0; i<sizeof(aCmd)/sizeof(aCmd[0]); i++){
@@ -3107,6 +3126,8 @@
       (char*)&sqlite3_search_count, TCL_LINK_INT);
   Tcl_LinkVar(interp, "sqlite_sort_count", 
       (char*)&sqlite3_sort_count, TCL_LINK_INT);
+  Tcl_LinkVar(interp, "sqlite_like_count", 
+      (char*)&sqlite3_like_count, TCL_LINK_INT);
   Tcl_LinkVar(interp, "sqlite_interrupt_count", 
       (char*)&sqlite3_interrupt_count, TCL_LINK_INT);
   Tcl_LinkVar(interp, "sqlite_open_file_count", 
diff --git a/src/vdbeaux.c b/src/vdbeaux.c
index b3fa8c3..cf81cfe 100644
--- a/src/vdbeaux.c
+++ b/src/vdbeaux.c
@@ -109,6 +109,7 @@
   pOp->p2 = p2;
   pOp->p3 = 0;
   pOp->p3type = P3_NOTUSED;
+  p->expired = 0;
 #ifdef SQLITE_DEBUG
   if( sqlite3_vdbe_addop_trace ) sqlite3VdbePrintOp(0, i, &p->aOp[i]);
 #endif
diff --git a/src/where.c b/src/where.c
index c340626..e98363d 100644
--- a/src/where.c
+++ b/src/where.c
@@ -16,7 +16,7 @@
 ** so is applicable.  Because this module is responsible for selecting
 ** indices, you might also think of this module as the "query optimizer".
 **
-** $Id: where.c,v 1.161 2005/08/13 16:13:05 drh Exp $
+** $Id: where.c,v 1.162 2005/08/14 01:20:39 drh Exp $
 */
 #include "sqliteInt.h"
 
@@ -467,54 +467,35 @@
 ** literal that does not begin with a wildcard.  
 */
 static int isLikeOrGlob(
+  sqlite3 *db,      /* The database */
   Expr *pExpr,      /* Test this expression */
   int *pnPattern,   /* Number of non-wildcard prefix characters */
   int *pisComplete  /* True if the only wildcard is % in the last character */
 ){
   const char *z;
   Expr *pRight, *pLeft;
+  ExprList *pList;
   int c, cnt;
-  char wc1, wc2, wc3;
-  if( pExpr->op!=TK_FUNCTION ){
+  char wc[3];
+  if( !sqlite3IsLikeFunction(db, pExpr, wc) ){
     return 0;
   }
-  if( pExpr->pList->nExpr!=2 ){
-    return 0;
-  }
-  if( pExpr->token.n!=4 ){
-    return 0;
-  }
-  z = pExpr->token.z;
-  if( sqlite3StrNICmp(z, "glob", 4)==0 ){
-    wc1 = '*';
-    wc2 = '?';
-    wc3 = '[';
-  }
-#ifdef SQLITE_CASE_SENSITIVE_LIKE
-  else if( sqlite3StrNICmp(z, "like", 4)==0 ){
-    wc1 = '%';
-    wc2 = '_';
-    wc3 = '_';
-  }
-#endif
-  else{
-    return 0;
-  }
-  pRight = pExpr->pList->a[0].pExpr;
+  pList = pExpr->pList;
+  pRight = pList->a[0].pExpr;
   if( pRight->op!=TK_STRING ){
     return 0;
   }
-  pLeft = pExpr->pList->a[1].pExpr;
+  pLeft = pList->a[1].pExpr;
   if( pLeft->op!=TK_COLUMN ){
     return 0;
   }
   sqlite3DequoteExpr(pRight);
   z = pRight->token.z;
-  for(cnt=0; (c=z[cnt])!=0 && c!=wc1 && c!=wc2 && c!=wc3; cnt++){}
+  for(cnt=0; (c=z[cnt])!=0 && c!=wc[0] && c!=wc[1] && c!=wc[2]; cnt++){}
   if( cnt==0 || 255==(u8)z[cnt] ){
     return 0;
   }
-  *pisComplete = z[cnt]==wc1 && z[cnt+1]==0;
+  *pisComplete = z[cnt]==wc[0] && z[cnt+1]==0;
   *pnPattern = cnt;
   return 1;
 }
@@ -671,7 +652,7 @@
   /* Add constraints to reduce the search space on a LIKE or GLOB
   ** operator.
   */
-  if( isLikeOrGlob(pExpr, &nPattern, &isComplete) ){
+  if( isLikeOrGlob(pTerm->pWC->pParse->db, pExpr, &nPattern, &isComplete) ){
     Expr *pLeft, *pRight;
     Expr *pStr1, *pStr2;
     Expr *pNewExpr1, *pNewExpr2;