Performance improvements for LIKE. It is still too slow though. (CVS 1535)
FossilOrigin-Name: 30b81507fc404355751705c6f9856c178249eff1
diff --git a/src/expr.c b/src/expr.c
index dd2a8ce..36b421f 100644
--- a/src/expr.c
+++ b/src/expr.c
@@ -12,7 +12,7 @@
** This file contains routines used for analyzing expressions and
** for generating VDBE code that evaluates expressions in SQLite.
**
-** $Id: expr.c,v 1.135 2004/06/06 09:44:04 danielk1977 Exp $
+** $Id: expr.c,v 1.136 2004/06/06 12:41:50 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include <ctype.h>
@@ -1752,6 +1752,7 @@
pBest->nArg = nArg;
pBest->pNext = pFirst;
pBest->zName = (char*)&pBest[1];
+ pBest->iPrefEnc = eTextRep;
memcpy(pBest->zName, zName, nName);
pBest->zName[nName] = 0;
sqlite3HashInsert(&db->aFunc, pBest->zName, nName, (void*)pBest);
diff --git a/src/func.c b/src/func.c
index e6cd5d0..bf4d6d4 100644
--- a/src/func.c
+++ b/src/func.c
@@ -16,7 +16,7 @@
** sqliteRegisterBuildinFunctions() found at the bottom of the file.
** All other code has file scope.
**
-** $Id: func.c,v 1.63 2004/06/06 09:44:04 danielk1977 Exp $
+** $Id: func.c,v 1.64 2004/06/06 12:41:50 danielk1977 Exp $
*/
#include <ctype.h>
#include <math.h>
@@ -418,7 +418,11 @@
if( pc_state<0 ){
aState[n].failstate = -1;
}else if( pc_state==n ){
- aState[n].failstate = pc_state;
+ if( c ){
+ aState[n].failstate = pc_state;
+ }else{
+ aState[n].failstate = -2;
+ }
}else{
int k = pLike->aState[n-1].failstate;
while( k>pc_state && aState[k+1].val!=-1 && aState[k+1].val!=c ){
@@ -461,13 +465,13 @@
int argc,
sqlite3_value **argv
){
- int s;
- int c;
- int nc;
+ register int c;
u8 enc;
int offset = 0;
const unsigned char *zString;
LikePattern *pLike = sqlite3_get_auxdata(context, 0);
+ struct LikeState *aState;
+ register struct LikeState *pState;
/* If either argument is NULL, the result is NULL */
if( sqlite3_value_type(argv[1])==SQLITE_NULL ||
@@ -479,6 +483,7 @@
if( sqlite3_user_data(context) ){
enc = TEXT_Utf16;
zString = (const unsigned char *)sqlite3_value_text16(argv[1]);
+ assert(0);
}else{
enc = TEXT_Utf8;
zString = sqlite3_value_text(argv[1]);
@@ -493,33 +498,39 @@
}
sqlite3_set_auxdata(context, 0, pLike, deleteLike);
}
+ aState = pLike->aState;
+ pState = aState;
- s = 0;
- nc = 1;
do {
- int val = pLike->aState[s].val;
- if( nc ) c = sqlite3ReadUniChar(zString, &offset, &enc, 1);
+ if( enc==TEXT_Utf8 ){
+ c = zString[offset++];
+ if( c&0x80 ){
+ offset--;
+ c = sqlite3ReadUniChar(zString, &offset, &enc, 1);
+ }
+ }else{
+ c = sqlite3ReadUniChar(zString, &offset, &enc, 1);
+ }
+
+skip_read:
#if defined(TRACE_LIKE) && !defined(NDEBUG)
printf("State=%d:(%d, %d) Input=%d\n",
- s, pLike->aState[s].val,
- pLike->aState[s].failstate, c);
+ (aState - pState), pState->val, pState->failstate, c);
#endif
- if( val==-1 || val==c ){
- s++;
- nc = 1;
+ if( pState->val==-1 || pState->val==c ){
+ pState++;
}else{
- if( pLike->aState[s].failstate==s ){
- nc = 1;
- }else{
- nc = 0;
- s = pLike->aState[s].failstate;
+ struct LikeState *pFailState = &aState[pState->failstate];
+ if( pState!=pFailState ){
+ pState = pFailState;
+ if( c && pState>=aState ) goto skip_read;
}
}
- }while( c && s>=0 );
+ }while( c && pState>=aState );
- if( s==pLike->nState ){
+ if( (pState-aState)==pLike->nState || (pState-aState)<-1 ){
sqlite3_result_int(context, 1);
}else{
sqlite3_result_int(context, 0);
@@ -916,8 +927,8 @@
case 1: pArg = db; break;
case 2: pArg = (void *)(-1); break;
}
- sqlite3_create_function(db, aFuncs[i].zName, aFuncs[i].nArg, 0, 0,
- pArg, aFuncs[i].xFunc, 0, 0);
+ sqlite3_create_function(db, aFuncs[i].zName, aFuncs[i].nArg,
+ aFuncs[i].eTextRep, 0, pArg, aFuncs[i].xFunc, 0, 0);
}
for(i=0; i<sizeof(aAggs)/sizeof(aAggs[0]); i++){
void *pArg = 0;
diff --git a/src/utf.c b/src/utf.c
index 4da418b..9d1884b 100644
--- a/src/utf.c
+++ b/src/utf.c
@@ -12,7 +12,7 @@
** This file contains routines used to translate between UTF-8,
** UTF-16, UTF-16BE, and UTF-16LE.
**
-** $Id: utf.c,v 1.17 2004/06/06 09:44:05 danielk1977 Exp $
+** $Id: utf.c,v 1.18 2004/06/06 12:41:50 danielk1977 Exp $
**
** Notes on UTF-8:
**
@@ -117,6 +117,34 @@
switch( *pEnc ){
case TEXT_Utf8: {
+
+#if 0
+ static const int initVal[] = {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
+ 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
+ 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
+ 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
+ 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
+ 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
+ 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104,
+ 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
+ 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134,
+ 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
+ 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
+ 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
+ 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 0, 1, 2,
+ 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
+ 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0,
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 0, 1, 254,
+ 255,
+ };
+ ret = initVal[(unsigned char)zStr[(*pOffset)++]];
+ while( (0xc0&zStr[*pOffset])==0x80 ){
+ ret = (ret<<6) | (0x3f&(zStr[(*pOffset)++]));
+ }
+#endif
+
struct Utf8TblRow {
u8 b1_mask;
u8 b1_masked_val;
@@ -153,7 +181,6 @@
}
ret = (ret<<6) + (u32)(b&0x3F);
}
-
break;
}
diff --git a/src/util.c b/src/util.c
index 637782d..bdfe54a 100644
--- a/src/util.c
+++ b/src/util.c
@@ -14,7 +14,7 @@
** This file contains functions for allocating memory, comparing
** strings, and stuff like that.
**
-** $Id: util.c,v 1.97 2004/06/06 09:44:05 danielk1977 Exp $
+** $Id: util.c,v 1.98 2004/06/06 12:41:50 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include <stdarg.h>
@@ -917,7 +917,7 @@
** UCS character. This only works right if z points to a well-formed
** UTF-8 string.
*/
-static int sqlite3ReadUtf8(const unsigned char *z){
+int sqlite3ReadUtf8(const unsigned char *z){
int c;
static const int initVal[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,