Performance improvements for LIKE. It is still too slow though. (CVS 1535)

FossilOrigin-Name: 30b81507fc404355751705c6f9856c178249eff1
diff --git a/src/expr.c b/src/expr.c
index dd2a8ce..36b421f 100644
--- a/src/expr.c
+++ b/src/expr.c
@@ -12,7 +12,7 @@
 ** This file contains routines used for analyzing expressions and
 ** for generating VDBE code that evaluates expressions in SQLite.
 **
-** $Id: expr.c,v 1.135 2004/06/06 09:44:04 danielk1977 Exp $
+** $Id: expr.c,v 1.136 2004/06/06 12:41:50 danielk1977 Exp $
 */
 #include "sqliteInt.h"
 #include <ctype.h>
@@ -1752,6 +1752,7 @@
     pBest->nArg = nArg;
     pBest->pNext = pFirst;
     pBest->zName = (char*)&pBest[1];
+    pBest->iPrefEnc = eTextRep;
     memcpy(pBest->zName, zName, nName);
     pBest->zName[nName] = 0;
     sqlite3HashInsert(&db->aFunc, pBest->zName, nName, (void*)pBest);
diff --git a/src/func.c b/src/func.c
index e6cd5d0..bf4d6d4 100644
--- a/src/func.c
+++ b/src/func.c
@@ -16,7 +16,7 @@
 ** sqliteRegisterBuildinFunctions() found at the bottom of the file.
 ** All other code has file scope.
 **
-** $Id: func.c,v 1.63 2004/06/06 09:44:04 danielk1977 Exp $
+** $Id: func.c,v 1.64 2004/06/06 12:41:50 danielk1977 Exp $
 */
 #include <ctype.h>
 #include <math.h>
@@ -418,7 +418,11 @@
       if( pc_state<0 ){
         aState[n].failstate = -1;
       }else if( pc_state==n ){
-        aState[n].failstate = pc_state;
+        if( c ){
+          aState[n].failstate = pc_state;
+        }else{
+          aState[n].failstate = -2;
+        }
       }else{
         int k = pLike->aState[n-1].failstate;
         while( k>pc_state && aState[k+1].val!=-1 && aState[k+1].val!=c ){
@@ -461,13 +465,13 @@
   int argc, 
   sqlite3_value **argv
 ){
-  int s;
-  int c;
-  int nc;
+  register int c;
   u8 enc;
   int offset = 0;
   const unsigned char *zString;
   LikePattern *pLike = sqlite3_get_auxdata(context, 0); 
+  struct LikeState *aState;
+  register struct LikeState *pState;
 
   /* If either argument is NULL, the result is NULL */
   if( sqlite3_value_type(argv[1])==SQLITE_NULL || 
@@ -479,6 +483,7 @@
   if( sqlite3_user_data(context) ){
     enc = TEXT_Utf16;
     zString = (const unsigned char *)sqlite3_value_text16(argv[1]);
+    assert(0);
   }else{
     enc = TEXT_Utf8;
     zString = sqlite3_value_text(argv[1]);
@@ -493,33 +498,39 @@
     }
     sqlite3_set_auxdata(context, 0, pLike, deleteLike);
   }
+  aState = pLike->aState;
+  pState = aState;
 
-  s = 0;
-  nc = 1;
   do {
-    int val = pLike->aState[s].val;
-    if( nc ) c = sqlite3ReadUniChar(zString, &offset, &enc, 1);
+    if( enc==TEXT_Utf8 ){
+      c = zString[offset++];
+      if( c&0x80 ){
+        offset--;
+        c = sqlite3ReadUniChar(zString, &offset, &enc, 1);
+      }
+    }else{
+      c = sqlite3ReadUniChar(zString, &offset, &enc, 1);
+    }
+
+skip_read:
 
 #if defined(TRACE_LIKE) && !defined(NDEBUG)
     printf("State=%d:(%d, %d) Input=%d\n", 
-        s, pLike->aState[s].val, 
-        pLike->aState[s].failstate, c);
+        (aState - pState), pState->val, pState->failstate, c);
 #endif
 
-    if( val==-1 || val==c ){
-      s++;
-      nc = 1;
+    if( pState->val==-1 || pState->val==c ){
+      pState++;
     }else{
-      if( pLike->aState[s].failstate==s ){
-        nc = 1;
-      }else{
-        nc = 0;
-        s = pLike->aState[s].failstate;
+      struct LikeState *pFailState = &aState[pState->failstate];
+      if( pState!=pFailState ){
+        pState = pFailState;
+        if( c && pState>=aState ) goto skip_read;
       }
     }
-  }while( c && s>=0 );
+  }while( c && pState>=aState );
 
-  if( s==pLike->nState ){
+  if( (pState-aState)==pLike->nState || (pState-aState)<-1 ){
     sqlite3_result_int(context, 1);
   }else{
     sqlite3_result_int(context, 0);
@@ -916,8 +927,8 @@
       case 1: pArg = db; break;
       case 2: pArg = (void *)(-1); break;
     }
-    sqlite3_create_function(db, aFuncs[i].zName, aFuncs[i].nArg, 0, 0,
-        pArg, aFuncs[i].xFunc, 0, 0);
+    sqlite3_create_function(db, aFuncs[i].zName, aFuncs[i].nArg,
+        aFuncs[i].eTextRep, 0, pArg, aFuncs[i].xFunc, 0, 0);
   }
   for(i=0; i<sizeof(aAggs)/sizeof(aAggs[0]); i++){
     void *pArg = 0;
diff --git a/src/utf.c b/src/utf.c
index 4da418b..9d1884b 100644
--- a/src/utf.c
+++ b/src/utf.c
@@ -12,7 +12,7 @@
 ** This file contains routines used to translate between UTF-8, 
 ** UTF-16, UTF-16BE, and UTF-16LE.
 **
-** $Id: utf.c,v 1.17 2004/06/06 09:44:05 danielk1977 Exp $
+** $Id: utf.c,v 1.18 2004/06/06 12:41:50 danielk1977 Exp $
 **
 ** Notes on UTF-8:
 **
@@ -117,6 +117,34 @@
 
   switch( *pEnc ){
     case TEXT_Utf8: {
+
+#if 0
+  static const int initVal[] = {
+      0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,
+     15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,
+     30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,
+     45,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,
+     60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,
+     75,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,
+     90,  91,  92,  93,  94,  95,  96,  97,  98,  99, 100, 101, 102, 103, 104,
+    105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
+    120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134,
+    135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
+    150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
+    165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
+    180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,   0,   1,   2,
+      3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,  15,  16,  17,
+     18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,   0,
+      1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,  15,
+      0,   1,   2,   3,   4,   5,   6,   7,   0,   1,   2,   3,   0,   1, 254,
+    255,
+  };
+  ret = initVal[(unsigned char)zStr[(*pOffset)++]];
+  while( (0xc0&zStr[*pOffset])==0x80 ){
+    ret = (ret<<6) | (0x3f&(zStr[(*pOffset)++]));
+  }
+#endif
+
       struct Utf8TblRow {
         u8 b1_mask;
         u8 b1_masked_val;
@@ -153,7 +181,6 @@
         }
         ret = (ret<<6) + (u32)(b&0x3F);
       }
-      
       break;
     }
 
diff --git a/src/util.c b/src/util.c
index 637782d..bdfe54a 100644
--- a/src/util.c
+++ b/src/util.c
@@ -14,7 +14,7 @@
 ** This file contains functions for allocating memory, comparing
 ** strings, and stuff like that.
 **
-** $Id: util.c,v 1.97 2004/06/06 09:44:05 danielk1977 Exp $
+** $Id: util.c,v 1.98 2004/06/06 12:41:50 danielk1977 Exp $
 */
 #include "sqliteInt.h"
 #include <stdarg.h>
@@ -917,7 +917,7 @@
 ** UCS character.  This only works right if z points to a well-formed
 ** UTF-8 string.
 */
-static int sqlite3ReadUtf8(const unsigned char *z){
+int sqlite3ReadUtf8(const unsigned char *z){
   int c;
   static const int initVal[] = {
       0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,