Enhance user function API to support association of meta-data with constant arguments and the specification of text encoding preference. The LIKE operator takes advantage of both. (CVS 1534) FossilOrigin-Name: 92337d8f79b9754cd61c73e7db2e792a1f482f50

commit: d02eb1fdf4b939e4065d13a64c7c38afda443826 [log] [tgz]
author: danielk1977 <danielk1977@noemail.net> Sun Jun 06 09:44:03 2004 +0000
committer: danielk1977 <danielk1977@noemail.net> Sun Jun 06 09:44:03 2004 +0000
tree: eaa8c797fe0e44eeb81ce6761c5b5bb2fe35ef8a
parent: 51c6d9633f52eb6d06b0291005d1a0b5fd552bd9 [diff]
diff --git a/src/expr.c b/src/expr.c
index b3ba9f4..dd2a8ce 100644
--- a/src/expr.c
+++ b/src/expr.c

@@ -12,7 +12,7 @@
 ** This file contains routines used for analyzing expressions and
 ** for generating VDBE code that evaluates expressions in SQLite.
 **
-** $Id: expr.c,v 1.134 2004/06/05 10:22:17 danielk1977 Exp $
+** $Id: expr.c,v 1.135 2004/06/06 09:44:04 danielk1977 Exp $
 */
 #include "sqliteInt.h"
 #include <ctype.h>
@@ -968,11 +968,12 @@
       int nId;                    /* Number of characters in function name */
       const char *zId;            /* The function name. */
       FuncDef *pDef;
+      int iPrefEnc = (pParse->db->enc==TEXT_Utf8)?0:1;
 
       getFunctionName(pExpr, &zId, &nId);
-      pDef = sqlite3FindFunction(pParse->db, zId, nId, n, 0);
+      pDef = sqlite3FindFunction(pParse->db, zId, nId, n, iPrefEnc, 0);
       if( pDef==0 ){
-        pDef = sqlite3FindFunction(pParse->db, zId, nId, -1, 0);
+        pDef = sqlite3FindFunction(pParse->db, zId, nId, -1, iPrefEnc, 0);
         if( pDef==0 ){
           no_such_func = 1;
         }else{
@@ -1233,12 +1234,15 @@
       const char *zId;
       int p2 = 0;
       int i;
+      int iPrefEnc = (pParse->db->enc==TEXT_Utf8)?0:1;
       getFunctionName(pExpr, &zId, &nId);
-      pDef = sqlite3FindFunction(pParse->db, zId, nId, nExpr, 0);
+      pDef = sqlite3FindFunction(pParse->db, zId, nId, nExpr, iPrefEnc, 0);
       assert( pDef!=0 );
       nExpr = sqlite3ExprCodeExprList(pParse, pList);
       for(i=0; i<nExpr && i<32; i++){
-        p2 &= (1<<i);
+        if( sqlite3ExprIsConstant(pList->a[i].pExpr) ){
+          p2 |= (1<<i);
+        }
       }
       sqlite3VdbeOp3(v, OP_Function, nExpr, p2, (char*)pDef, P3_FUNCDEF);
       break;
@@ -1645,13 +1649,14 @@
         }
       }
       if( i>=pParse->nAgg ){
+        int iPrefEnc = (pParse->db->enc==TEXT_Utf8)?0:1;
         i = appendAggInfo(pParse);
         if( i<0 ) return 1;
         pParse->aAgg[i].isAgg = 1;
         pParse->aAgg[i].pExpr = pExpr;
         pParse->aAgg[i].pFunc = sqlite3FindFunction(pParse->db,
              pExpr->token.z, pExpr->token.n,
-             pExpr->pList ? pExpr->pList->nExpr : 0, 0);
+             pExpr->pList ? pExpr->pList->nExpr : 0, iPrefEnc, 0);
       }
       pExpr->iAgg = i;
       break;
@@ -1677,9 +1682,10 @@
 }
 
 /*
-** Locate a user function given a name and a number of arguments.
-** Return a pointer to the FuncDef structure that defines that
-** function, or return NULL if the function does not exist.
+** Locate a user function given a name, a number of arguments and a flag
+** indicating whether the function prefers UTF-16 over UTF-8.  Return a
+** pointer to the FuncDef structure that defines that function, or return
+** NULL if the function does not exist.
 **
 ** If the createFlag argument is true, then a new (blank) FuncDef
 ** structure is created and liked into the "db" structure if a
@@ -1690,39 +1696,70 @@
 ** If createFlag is false and nArg is -1, then the first valid
 ** function found is returned.  A function is valid if either xFunc
 ** or xStep is non-zero.
+**
+** If createFlag is false, then a function with the required name and
+** number of arguments may be returned even if the eTextRep flag does not
+** match that requested.
 */
 FuncDef *sqlite3FindFunction(
   sqlite *db,        /* An open database */
   const char *zName, /* Name of the function.  Not null-terminated */
   int nName,         /* Number of characters in the name */
   int nArg,          /* Number of arguments.  -1 means any number */
+  int eTextRep,      /* True to retrieve UTF-16 versions. */
   int createFlag     /* Create new entry if true and does not otherwise exist */
 ){
-  FuncDef *pFirst, *p, *pMaybe;
-  pFirst = p = (FuncDef*)sqlite3HashFind(&db->aFunc, zName, nName);
-  if( p && !createFlag && nArg<0 ){
-    while( p && p->xFunc==0 && p->xStep==0 ){ p = p->pNext; }
-    return p;
+  FuncDef *p;         /* Iterator variable */
+  FuncDef *pFirst;    /* First function with this name */
+  FuncDef *pBest = 0; /* Best match found so far */
+  int matchqual = 0;  
+
+  /* Normalize argument values to simplify comparisons below. */
+  if( eTextRep ) eTextRep = 1;
+  if( nArg<-1 ) nArg = -1;
+
+  pFirst = (FuncDef*)sqlite3HashFind(&db->aFunc, zName, nName);
+  for(p=pFirst; p; p=p->pNext){
+    if( 1 || p->xFunc || p->xStep ){
+      if( p->nArg==nArg && p->iPrefEnc==eTextRep ){
+        /* A perfect match. */
+        pBest = p;
+        matchqual = 4;
+        break;
+      }
+      if( p->nArg==nArg ){
+        /* Number of arguments matches, but not the text encoding */
+        pBest = p;
+        matchqual = 3;
+      }
+      else if( (p->nArg<0) || (nArg<0) ){
+        if( matchqual<2 && p->iPrefEnc==eTextRep ){
+          /* Matched a varargs function with correct text encoding */
+          pBest = p;
+          matchqual = 2;
+        }
+        if( matchqual<1 ){
+          /* Matched a varargs function with incorrect text encoding */
+          pBest = p;
+          matchqual = 1;
+        }
+      }
+    }
   }
-  pMaybe = 0;
-  while( p && p->nArg!=nArg ){
-    if( p->nArg<0 && !createFlag && (p->xFunc || p->xStep) ) pMaybe = p;
-    p = p->pNext;
+
+  if( createFlag && matchqual<4 && 
+      (pBest = sqliteMalloc(sizeof(*pBest)+nName+1)) ){
+    pBest->nArg = nArg;
+    pBest->pNext = pFirst;
+    pBest->zName = (char*)&pBest[1];
+    memcpy(pBest->zName, zName, nName);
+    pBest->zName[nName] = 0;
+    sqlite3HashInsert(&db->aFunc, pBest->zName, nName, (void*)pBest);
   }
-  if( p && !createFlag && p->xFunc==0 && p->xStep==0 ){
-    return 0;
+
+  if( pBest && (pBest->xStep || pBest->xFunc || createFlag) ){
+    return pBest;
   }
-  if( p==0 && pMaybe ){
-    assert( createFlag==0 );
-    return pMaybe;
-  }
-  if( p==0 && createFlag && (p = sqliteMalloc(sizeof(*p)+nName+1))!=0 ){
-    p->nArg = nArg;
-    p->pNext = pFirst;
-    p->zName = (char*)&p[1];
-    memcpy(p->zName, zName, nName);
-    p->zName[nName] = 0;
-    sqlite3HashInsert(&db->aFunc, p->zName, nName, (void*)p);
-  }
-  return p;
+  return 0;
 }
+

diff --git a/src/func.c b/src/func.c
index 706bfcf..e6cd5d0 100644
--- a/src/func.c
+++ b/src/func.c

@@ -16,7 +16,7 @@
 ** sqliteRegisterBuildinFunctions() found at the bottom of the file.
 ** All other code has file scope.
 **
-** $Id: func.c,v 1.62 2004/05/31 18:51:58 drh Exp $
+** $Id: func.c,v 1.63 2004/06/06 09:44:04 danielk1977 Exp $
 */
 #include <ctype.h>
 #include <math.h>
@@ -293,23 +293,236 @@
 }
 
 /*
+** A LIKE pattern compiles to an instance of the following structure. Refer
+** to the comment for compileLike() function for details.
+*/
+struct LikePattern {
+  int nState;
+  struct LikeState {
+    int val;           /* Unicode codepoint or -1 for any char i.e. '_' */
+    int failstate;     /* State to jump to if next char is not val */
+  } aState[0];
+};
+typedef struct LikePattern LikePattern;
+
+void deleteLike(void *pLike){
+  sqliteFree(pLike);
+}
+
+
+/* #define TRACE_LIKE */
+
+#if defined(TRACE_LIKE) && !defined(NDEBUG)
+char *dumpLike(LikePattern *pLike){
+  int i;
+  int k = 0;
+  char *zBuf = (char *)sqliteMalloc(pLike->nState*40);
+  
+  k += sprintf(&zBuf[k], "%d states - ", pLike->nState);
+  for(i=0; i<pLike->nState; i++){
+    k += sprintf(&zBuf[k], " %d:(%d, %d)", i, pLike->aState[i].val,
+        pLike->aState[i].failstate);
+  }
+  return zBuf;
+}
+#endif
+
+/*
+** This function compiles an SQL 'LIKE' pattern into a state machine, 
+** represented by a LikePattern structure.
+**
+** Each state of the state-machine has two attributes, 'val' and
+** 'failstate'. The val attribute is either the value of a unicode 
+** codepoint, or -1, indicating a '_' wildcard (match any single
+** character). The failstate is either the number of another state
+** or -1, indicating jump to 'no match'.
+**
+** To see if a string matches a pattern the pattern is
+** compiled to a state machine that is executed according to the algorithm
+** below. The string is assumed to be terminated by a 'NUL' character
+** (unicode codepoint 0).
+**
+** 1   S = 0
+** 2   DO 
+** 3       C = <Next character from input string>
+** 4       IF( C matches <State S val> )
+** 5           S = S+1
+** 6       ELSE IF( S != <State S failstate> )
+** 7           S = <State S failstate>
+** 8           <Rewind Input string 1 character>
+** 9   WHILE( (C != NUL) AND (S != FAILED) )
+** 10
+** 11  IF( S == <number of states> )
+** 12      RETURN MATCH
+** 13  ELSE
+** 14      RETURN NO-MATCH
+**       
+** In practice there is a small optimization to avoid the <Rewind>
+** operation in line 8 of the description above.
+**
+** For example, the following pattern, 'X%ABabc%_Y' is compiled to
+** the state machine below.
+**
+** State    Val          FailState
+** -------------------------------
+** 0        120 (x)      -1 (NO MATCH)
+** 1        97  (a)      1
+** 2        98  (b)      1
+** 3        97  (a)      1
+** 4        98  (b)      2
+** 5        99  (c)      3
+** 6        -1  (_)      6
+** 7        121 (y)      7
+** 8        0   (NUL)    7
+**
+** The algorithms implemented to compile and execute the state machine were
+** first presented in "Fast pattern matching in strings", Knuth, Morris and
+** Pratt, 1977.
+**       
+*/
+LikePattern *compileLike(sqlite3_value *pPattern, u8 enc){
+  LikePattern *pLike;
+  struct LikeState *aState;
+  int pc_state = -1;    /* State number of previous '%' wild card */
+  int n = 0;
+  int c;
+
+  int offset = 0;
+  const char *zLike;
+ 
+  if( enc==TEXT_Utf8 ){
+    zLike = sqlite3_value_text(pPattern);
+    n = sqlite3_value_bytes(pPattern) + 1;
+  }else{
+    zLike = sqlite3_value_text16(pPattern);
+    n = sqlite3_value_bytes16(pPattern)/2 + 1;
+  }
+
+  pLike = (LikePattern *)
+      sqliteMalloc(sizeof(LikePattern)+n*sizeof(struct LikeState));
+  aState = pLike->aState;
+
+  n = 0;
+  do {
+    c = sqlite3ReadUniChar(zLike, &offset, &enc, 1);
+    if( c==95 ){        /* A '_' wildcard */
+      aState[n].val = -1;
+      n++;
+    }else if( c==37 ){  /* A '%' wildcard */
+      aState[n].failstate = n;
+      pc_state = n;
+    }else{              /* A regular character */
+      aState[n].val = c;
+
+      assert( pc_state<=n );
+      if( pc_state<0 ){
+        aState[n].failstate = -1;
+      }else if( pc_state==n ){
+        aState[n].failstate = pc_state;
+      }else{
+        int k = pLike->aState[n-1].failstate;
+        while( k>pc_state && aState[k+1].val!=-1 && aState[k+1].val!=c ){
+          k = aState[k].failstate;
+        }
+        if( k!=pc_state && aState[k+1].val==c ){
+          assert( k==pc_state );
+          k++;
+        }
+        aState[n].failstate = k;
+      }
+      n++;
+    }
+  }while( c );
+  pLike->nState = n;
+#if defined(TRACE_LIKE) && !defined(NDEBUG)
+  {
+    char *zCompiled = dumpLike(pLike);
+    printf("Pattern=\"%s\" Compiled=\"%s\"\n", zPattern, zCompiled);
+    sqliteFree(zCompiled);
+  }
+#endif
+  return pLike;
+}
+
+/*
 ** Implementation of the like() SQL function.  This function implements
 ** the build-in LIKE operator.  The first argument to the function is the
-** string and the second argument is the pattern.  So, the SQL statements:
+** pattern and the second argument is the string.  So, the SQL statements:
 **
 **       A LIKE B
 **
-** is implemented as like(A,B).
+** is implemented as like(B,A).
+**
+** If the pointer retrieved by via a call to sqlite3_user_data() is
+** not NULL, then this function uses UTF-16. Otherwise UTF-8.
 */
 static void likeFunc(
   sqlite3_context *context, 
   int argc, 
   sqlite3_value **argv
 ){
-  const unsigned char *zA = sqlite3_value_text(argv[0]);
-  const unsigned char *zB = sqlite3_value_text(argv[1]);
-  if( zA && zB ){
-    sqlite3_result_int(context, sqlite3LikeCompare(zA, zB));
+  int s;
+  int c;
+  int nc;
+  u8 enc;
+  int offset = 0;
+  const unsigned char *zString;
+  LikePattern *pLike = sqlite3_get_auxdata(context, 0); 
+
+  /* If either argument is NULL, the result is NULL */
+  if( sqlite3_value_type(argv[1])==SQLITE_NULL || 
+      sqlite3_value_type(argv[0])==SQLITE_NULL ){
+    return;
+  }
+
+  /* If the user-data pointer is NULL, use UTF-8. Otherwise UTF-16. */
+  if( sqlite3_user_data(context) ){
+    enc = TEXT_Utf16;
+    zString = (const unsigned char *)sqlite3_value_text16(argv[1]);
+  }else{
+    enc = TEXT_Utf8;
+    zString = sqlite3_value_text(argv[1]);
+  }
+
+  /* If the LIKE pattern has not been compiled, compile it now. */
+  if( !pLike ){
+    pLike = compileLike(argv[0], enc);
+    if( !pLike ){
+      sqlite3_result_error(context, "out of memory", -1);
+      return;
+    }
+    sqlite3_set_auxdata(context, 0, pLike, deleteLike);
+  }
+
+  s = 0;
+  nc = 1;
+  do {
+    int val = pLike->aState[s].val;
+    if( nc ) c = sqlite3ReadUniChar(zString, &offset, &enc, 1);
+
+#if defined(TRACE_LIKE) && !defined(NDEBUG)
+    printf("State=%d:(%d, %d) Input=%d\n", 
+        s, pLike->aState[s].val, 
+        pLike->aState[s].failstate, c);
+#endif
+
+    if( val==-1 || val==c ){
+      s++;
+      nc = 1;
+    }else{
+      if( pLike->aState[s].failstate==s ){
+        nc = 1;
+      }else{
+        nc = 0;
+        s = pLike->aState[s].failstate;
+      }
+    }
+  }while( c && s>=0 );
+
+  if( s==pLike->nState ){
+    sqlite3_result_int(context, 1);
+  }else{
+    sqlite3_result_int(context, 0);
   }
 }
 
@@ -642,39 +855,40 @@
      char *zName;
      signed char nArg;
      u8 argType;               /* 0: none.  1: db  2: (-1) */
+     u8 eTextRep;              /* 1: UTF-16.  0: UTF-8 */
      void (*xFunc)(sqlite3_context*,int,sqlite3_value **);
   } aFuncs[] = {
-    { "min",                        -1, 0, minmaxFunc },
-    { "min",                         0, 0, 0          },
-    { "max",                        -1, 2, minmaxFunc },
-    { "max",                         0, 2, 0          },
-    { "typeof",                      1, 0, typeofFunc },
-    { "classof",                     1, 0, typeofFunc }, /* FIX ME: hack */
-    { "length",                      1, 0, lengthFunc },
-    { "substr",                      3, 0, substrFunc },
-    { "abs",                         1, 0, absFunc    },
-    { "round",                       1, 0, roundFunc  },
-    { "round",                       2, 0, roundFunc  },
-    { "upper",                       1, 0, upperFunc  },
-    { "lower",                       1, 0, lowerFunc  },
-    { "coalesce",                   -1, 0, ifnullFunc },
-    { "coalesce",                    0, 0, 0          },
-    { "coalesce",                    1, 0, 0          },
-    { "ifnull",                      2, 0, ifnullFunc },
-    { "random",                     -1, 0, randomFunc },
-    { "like",                        2, 0, likeFunc   },
-    { "glob",                        2, 0, globFunc   },
-    { "nullif",                      2, 0, nullifFunc },
-    { "sqlite_version",              0, 0, versionFunc},
-    { "quote",                       1, 0, quoteFunc  },
-    { "last_insert_rowid",           0, 1, last_insert_rowid },
-    { "change_count",                0, 1, change_count      },
-    { "last_statement_change_count", 0, 1, last_statement_change_count },
+    { "min",                        -1, 0, 0, minmaxFunc },
+    { "min",                         0, 0, 0, 0          },
+    { "max",                        -1, 2, 0, minmaxFunc },
+    { "max",                         0, 2, 0, 0          },
+    { "typeof",                      1, 0, 0, typeofFunc },
+    { "length",                      1, 0, 0, lengthFunc },
+    { "substr",                      3, 0, 0, substrFunc },
+    { "abs",                         1, 0, 0, absFunc    },
+    { "round",                       1, 0, 0, roundFunc  },
+    { "round",                       2, 0, 0, roundFunc  },
+    { "upper",                       1, 0, 0, upperFunc  },
+    { "lower",                       1, 0, 0, lowerFunc  },
+    { "coalesce",                   -1, 0, 0, ifnullFunc },
+    { "coalesce",                    0, 0, 0, 0          },
+    { "coalesce",                    1, 0, 0, 0          },
+    { "ifnull",                      2, 0, 0, ifnullFunc },
+    { "random",                     -1, 0, 0, randomFunc },
+    { "like",                        2, 0, 0, likeFunc   }, /* UTF-8 */
+    { "like",                        2, 2, 1, likeFunc   }, /* UTF-16 */
+    { "glob",                        2, 0, 0, globFunc   },
+    { "nullif",                      2, 0, 0, nullifFunc },
+    { "sqlite_version",              0, 0, 0, versionFunc},
+    { "quote",                       1, 0, 0, quoteFunc  },
+    { "last_insert_rowid",           0, 1, 0, last_insert_rowid },
+    { "change_count",                0, 1, 0, change_count      },
+    { "last_statement_change_count", 0, 1, 0, last_statement_change_count },
 #ifdef SQLITE_SOUNDEX
-    { "soundex",                     1, 0, soundexFunc},
+    { "soundex",                     1, 0, 0, soundexFunc},
 #endif
 #ifdef SQLITE_TEST
-    { "randstr",                     2, 0, randStr    },
+    { "randstr",                     2, 0, 0, randStr    },
 #endif
   };
   static struct {

diff --git a/src/main.c b/src/main.c
index ce93f2e..c0dada0 100644
--- a/src/main.c
+++ b/src/main.c

@@ -14,7 +14,7 @@
 ** other files are for internal use by SQLite and should not be
 ** accessed by users of the library.
 **
-** $Id: main.c,v 1.204 2004/06/04 06:22:01 danielk1977 Exp $
+** $Id: main.c,v 1.205 2004/06/06 09:44:04 danielk1977 Exp $
 */
 #include "sqliteInt.h"
 #include "os.h"
@@ -656,7 +656,7 @@
     return SQLITE_ERROR;
   }
 
-  p = sqlite3FindFunction(db, zFunctionName, nName, nArg, 1);
+  p = sqlite3FindFunction(db, zFunctionName, nName, nArg, eTextRep, 1);
   if( p==0 ) return 1;
   p->xFunc = xFunc;
   p->xStep = xStep;

diff --git a/src/sqlite.h.in b/src/sqlite.h.in
index e710ada..0b6cd99 100644
--- a/src/sqlite.h.in
+++ b/src/sqlite.h.in

@@ -12,7 +12,7 @@
 ** This header file defines the interface that the SQLite library
 ** presents to client programs.
 **
-** @(#) $Id: sqlite.h.in,v 1.92 2004/06/05 10:22:18 danielk1977 Exp $
+** @(#) $Id: sqlite.h.in,v 1.93 2004/06/06 09:44:04 danielk1977 Exp $
 */
 #ifndef _SQLITE_H_
 #define _SQLITE_H_
@@ -835,6 +835,13 @@
 ** aggregate takes. If this parameter is negative, then the function or
 ** aggregate may take any number of arguments.
 **
+** If the fourth parameter is non-zero, this indicates that the function is
+** more likely to handle text in UTF-16 encoding than UTF-8. This does not
+** change the behaviour of the programming interface. However, if two
+** versions of the same function are registered, one with eTextRep non-zero
+** and the other zero, SQLite invokes the version likely to minimize
+** conversions between unicode encodings.
+**
 ** The seventh, eighth and ninth parameters, xFunc, xStep and xFinal, are
 ** pointers to user implemented C functions that implement the user
 ** function or aggregate. A scalar function requires an implementation of

diff --git a/src/sqliteInt.h b/src/sqliteInt.h
index aa95275..8d9815f 100644
--- a/src/sqliteInt.h
+++ b/src/sqliteInt.h

@@ -11,7 +11,7 @@
 *************************************************************************
 ** Internal interface definitions for SQLite.
 **
-** @(#) $Id: sqliteInt.h,v 1.268 2004/06/04 06:22:02 danielk1977 Exp $
+** @(#) $Id: sqliteInt.h,v 1.269 2004/06/06 09:44:05 danielk1977 Exp $
 */
 #include "config.h"
 #include "sqlite3.h"
@@ -1265,7 +1265,6 @@
 void sqlite3Vacuum(Parse*, Token*);
 int sqlite3RunVacuum(char**, sqlite*);
 int sqlite3GlobCompare(const unsigned char*,const unsigned char*);
-int sqlite3LikeCompare(const unsigned char*,const unsigned char*);
 char *sqlite3TableNameFromToken(Token*);
 int sqlite3ExprCheck(Parse*, Expr*, int, int*);
 int sqlite3ExprType(Expr*);
@@ -1297,7 +1296,7 @@
 SrcList *sqlite3SrcListDup(SrcList*);
 IdList *sqlite3IdListDup(IdList*);
 Select *sqlite3SelectDup(Select*);
-FuncDef *sqlite3FindFunction(sqlite*,const char*,int,int,int);
+FuncDef *sqlite3FindFunction(sqlite*,const char*,int,int,int,int);
 void sqlite3RegisterBuiltinFunctions(sqlite*);
 void sqlite3RegisterDateTimeFunctions(sqlite*);
 int sqlite3SafetyOn(sqlite*);
@@ -1373,3 +1372,4 @@
 void *sqlite3HexToBlob(const char *z);
 int sqlite3TwoPartName(Parse *, Token *, Token *, Token **);
 const char *sqlite3ErrStr(int);
+int sqlite3ReadUniChar(const char *zStr, int *pOffset, u8 *pEnc, int fold);

diff --git a/src/utf.c b/src/utf.c
index 65dd05e..4da418b 100644
--- a/src/utf.c
+++ b/src/utf.c

@@ -12,7 +12,7 @@
 ** This file contains routines used to translate between UTF-8, 
 ** UTF-16, UTF-16BE, and UTF-16LE.
 **
-** $Id: utf.c,v 1.16 2004/06/02 00:29:24 danielk1977 Exp $
+** $Id: utf.c,v 1.17 2004/06/06 09:44:05 danielk1977 Exp $
 **
 ** Notes on UTF-8:
 **
@@ -75,6 +75,138 @@
 #define READ_16(pZ,big_endian) (big_endian?BE16(pZ):LE16(pZ))
 
 /*
+** The following macro, LOWERCASE(x), takes an integer representing a
+** unicode code point. The value returned is the same code point folded to
+** lower case, if applicable. SQLite currently understands the upper/lower
+** case relationship between the 26 characters used in the English
+** language only.
+**
+** This means that characters with umlauts etc. will not be folded
+** correctly (unless they are encoded as composite characters, which would
+** doubtless cause much trouble).
+*/
+#define LOWERCASE(x) (x<91?(int)(UpperToLower[x]):x);
+static unsigned char UpperToLower[91] = {
+      0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
+     18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
+     36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53,
+     54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 97, 98, 99,100,101,102,103,
+    104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,
+    122,
+};
+
+/*
+** The first parameter, zStr, points at a unicode string. This routine
+** reads a single character from the string and returns the codepoint value
+** of the character read.
+**
+** The value of *pEnc is the string encoding. If *pEnc is TEXT_Utf16le or
+** TEXT_Utf16be, and the first character read is a byte-order-mark, then
+** the value of *pEnc is modified if necessary. In this case the next
+** character is read and it's code-point value returned.
+**
+** The value of *pOffset is the byte-offset in zStr from which to begin
+** reading. It is incremented by the number of bytes read by this function.
+**
+** If the fourth parameter, fold, is non-zero, then codepoint values are
+** folded to lower-case before being returned. See comments for macro
+** LOWERCASE(x) for details.
+*/
+int sqlite3ReadUniChar(const char *zStr, int *pOffset, u8 *pEnc, int fold){
+  int ret = 0;
+
+  switch( *pEnc ){
+    case TEXT_Utf8: {
+      struct Utf8TblRow {
+        u8 b1_mask;
+        u8 b1_masked_val;
+        u8 b1_value_mask;
+        int trailing_bytes;
+      };
+      static const struct Utf8TblRow utf8tbl[] = {
+        { 0x80, 0x00, 0x7F, 0 },
+        { 0xE0, 0xC0, 0x1F, 1 },
+        { 0xF0, 0xE0, 0x0F, 2 },
+        { 0xF8, 0xF0, 0x0E, 3 },
+        { 0, 0, 0, 0}
+      };
+    
+      u8 b1;   /* First byte of the potentially multi-byte utf-8 character */
+      int ii;
+      struct Utf8TblRow const *pRow;
+    
+      pRow = &(utf8tbl[0]);
+    
+      b1 = zStr[(*pOffset)++];
+      while( pRow->b1_mask && (b1&pRow->b1_mask)!=pRow->b1_masked_val ){
+        pRow++;
+      }
+      if( !pRow->b1_mask ){
+        return (int)0xFFFD;
+      }
+      
+      ret = (u32)(b1&pRow->b1_value_mask);
+      for( ii=0; ii<pRow->trailing_bytes; ii++ ){
+        u8 b = zStr[(*pOffset)++];
+        if( (b&0xC0)!=0x80 ){
+          return (int)0xFFFD;
+        }
+        ret = (ret<<6) + (u32)(b&0x3F);
+      }
+      
+      break;
+    }
+
+    case TEXT_Utf16le:
+    case TEXT_Utf16be: {
+      u32 code_point;   /* the first code-point in the character */
+      u32 code_point2;  /* the second code-point in the character, if any */
+    
+      code_point = READ_16(&zStr[*pOffset], (*pEnc==TEXT_Utf16be));
+      *pOffset += 2;
+    
+      /* If this is a non-surrogate code-point, just cast it to an int and
+      ** this is the code-point value.
+      */
+      if( code_point<0xD800 || code_point>0xE000 ){
+        ret = code_point;
+        break;
+      }
+
+      /* If this is a trailing surrogate code-point, then the string is
+      ** malformed; return the replacement character.
+      */
+      if( code_point>0xDBFF ){
+        return (int)0xFFFD;
+      }
+    
+      /* The code-point just read is a leading surrogate code-point. If their
+      ** is not enough data left or the next code-point is not a trailing
+      ** surrogate, return the replacement character.
+      */
+      code_point2 = READ_16(&zStr[*pOffset], (*pEnc==TEXT_Utf16be));
+      *pOffset += 2;
+      if( code_point2<0xDC00 || code_point>0xDFFF ){
+        return (int)0xFFFD;
+      }
+   
+      ret = ( 
+          (((code_point&0x03C0)+0x0040)<<16) +   /* uuuuu */
+          ((code_point&0x003F)<<10) +            /* xxxxxx */
+          (code_point2&0x03FF)                   /* yy yyyyyyyy */
+      );
+    }
+    default:
+      assert(0);
+  }
+
+  if( fold ){
+    return LOWERCASE(ret);
+  }
+  return ret;
+}
+
+/*
 ** Read the BOM from the start of *pStr, if one is present. Return zero
 ** for little-endian, non-zero for big-endian. If no BOM is present, return
 ** the value of the parameter "big_endian".
@@ -133,47 +265,8 @@
 ** strings, the unicode replacement character U+FFFD may be returned.
 */
 static u32 readUtf8(UtfString *pStr){
-  struct Utf8TblRow {
-    u8 b1_mask;
-    u8 b1_masked_val;
-    u8 b1_value_mask;
-    int trailing_bytes;
-  };
-  static const struct Utf8TblRow utf8tbl[] = {
-    { 0x80, 0x00, 0x7F, 0 },
-    { 0xE0, 0xC0, 0x1F, 1 },
-    { 0xF0, 0xE0, 0x0F, 2 },
-    { 0xF8, 0xF0, 0x0E, 3 },
-    { 0, 0, 0, 0}
-  };
-
-  u8 b1;       /* First byte of the potentially multi-byte utf-8 character */
-  u32 ret = 0; /* Return value */
-  int ii;
-  struct Utf8TblRow const *pRow;
-
-  pRow = &(utf8tbl[0]);
-
-  b1 = pStr->pZ[pStr->c];
-  pStr->c++;
-  while( pRow->b1_mask && (b1&pRow->b1_mask)!=pRow->b1_masked_val ){
-    pRow++;
-  }
-  if( !pRow->b1_mask ){
-    return 0xFFFD;
-  }
-  
-  ret = (u32)(b1&pRow->b1_value_mask);
-  for( ii=0; ii<pRow->trailing_bytes; ii++ ){
-    u8 b = pStr->pZ[pStr->c+ii];
-    if( (b&0xC0)!=0x80 ){
-      return 0xFFFD;
-    }
-    ret = (ret<<6) + (u32)(b&0x3F);
-  }
-  
-  pStr->c += pRow->trailing_bytes;
-  return ret;
+  u8 enc = TEXT_Utf8;
+  return sqlite3ReadUniChar(pStr->pZ, &pStr->c, &enc, 0);
 }
 
 /*

diff --git a/src/util.c b/src/util.c
index 00f20f6..637782d 100644
--- a/src/util.c
+++ b/src/util.c

@@ -14,7 +14,7 @@
 ** This file contains functions for allocating memory, comparing
 ** strings, and stuff like that.
 **
-** $Id: util.c,v 1.96 2004/06/02 00:41:10 drh Exp $
+** $Id: util.c,v 1.97 2004/06/06 09:44:05 danielk1977 Exp $
 */
 #include "sqliteInt.h"
 #include <stdarg.h>
@@ -1056,57 +1056,6 @@
 }
 
 /*
-** Compare two UTF-8 strings for equality using the "LIKE" operator of
-** SQL.  The '%' character matches any sequence of 0 or more
-** characters and '_' matches any single character.  Case is
-** not significant.
-**
-** This routine is just an adaptation of the sqlite3GlobCompare()
-** routine above.
-*/
-int 
-sqlite3LikeCompare(const unsigned char *zPattern, const unsigned char *zString){
-  register int c;
-  int c2;
-
-  while( (c = UpperToLower[*zPattern])!=0 ){
-    switch( c ){
-      case '%': {
-        while( (c=zPattern[1]) == '%' || c == '_' ){
-          if( c=='_' ){
-            if( *zString==0 ) return 0;
-            sqliteNextChar(zString);
-          }
-          zPattern++;
-        }
-        if( c==0 ) return 1;
-        c = UpperToLower[c];
-        while( (c2=UpperToLower[*zString])!=0 ){
-          while( c2 != 0 && c2 != c ){ c2 = UpperToLower[*++zString]; }
-          if( c2==0 ) return 0;
-          if( sqlite3LikeCompare(&zPattern[1],zString) ) return 1;
-          sqliteNextChar(zString);
-        }
-        return 0;
-      }
-      case '_': {
-        if( *zString==0 ) return 0;
-        sqliteNextChar(zString);
-        zPattern++;
-        break;
-      }
-      default: {
-        if( c != UpperToLower[*zString] ) return 0;
-        zPattern++;
-        zString++;
-        break;
-      }
-    }
-  }
-  return *zString==0;
-}
-
-/*
 ** Change the sqlite.magic from SQLITE_MAGIC_OPEN to SQLITE_MAGIC_BUSY.
 ** Return an error (non-zero) if the magic was not SQLITE_MAGIC_OPEN
 ** when this routine is called.

diff --git a/src/vdbe.c b/src/vdbe.c
index 29ac27c..848d7cc 100644
--- a/src/vdbe.c
+++ b/src/vdbe.c

@@ -43,7 +43,7 @@
 ** in this file for details.  If in doubt, do not deviate from existing
 ** commenting and indentation practices when changing or adding code.
 **
-** $Id: vdbe.c,v 1.358 2004/06/05 10:22:18 danielk1977 Exp $
+** $Id: vdbe.c,v 1.359 2004/06/06 09:44:05 danielk1977 Exp $
 */
 #include "sqliteInt.h"
 #include "os.h"
@@ -1273,7 +1273,7 @@
   */
   if( ctx.pVdbeFunc ){
     int mask = pOp->p2;
-    for(i=0; i<n; i++){
+    for(i=0; i<ctx.pVdbeFunc->nAux; i++){
       struct AuxData *pAux = &ctx.pVdbeFunc->apAux[i];
       if( (i>31 || !(mask&(1<<i))) && pAux->pAux ){
         pAux->xDelete(pAux->pAux);

diff --git a/src/vdbeapi.c b/src/vdbeapi.c
index 1e40fad..ac7d976 100644
--- a/src/vdbeapi.c
+++ b/src/vdbeapi.c

@@ -248,6 +248,7 @@
     pCtx->pVdbeFunc = sqliteRealloc(pCtx->pVdbeFunc, nMalloc);
     if( !pCtx->pVdbeFunc ) return;
     pCtx->pVdbeFunc->nAux = iArg+1;
+    pCtx->pVdbeFunc->pFunc = pCtx->pFunc;
   }
 
   pAuxData = &pCtx->pVdbeFunc->apAux[iArg];

diff --git a/src/vdbeaux.c b/src/vdbeaux.c
index d3a982a..e17a1cb 100644
--- a/src/vdbeaux.c
+++ b/src/vdbeaux.c

@@ -1235,9 +1235,10 @@
       sqliteFree(pOp->p3);
     }
     if( pOp->p3type==P3_VDBEFUNC ){
+      int j;
       VdbeFunc *pVdbeFunc = (VdbeFunc *)pOp->p3;
-      for(i=0; i<pVdbeFunc->nAux; i++){
-        struct AuxData *pAuxData = &pVdbeFunc->apAux[i].pAux;
+      for(j=0; j<pVdbeFunc->nAux; j++){
+        struct AuxData *pAuxData = &pVdbeFunc->apAux[j].pAux;
         if( pAuxData->pAux && pAuxData->xDelete ){
           pAuxData->xDelete(pAuxData->pAux);
         }
commit	d02eb1fdf4b939e4065d13a64c7c38afda443826	[log] [tgz]
author	danielk1977 <danielk1977@noemail.net>	Sun Jun 06 09:44:03 2004 +0000
committer	danielk1977 <danielk1977@noemail.net>	Sun Jun 06 09:44:03 2004 +0000
tree	eaa8c797fe0e44eeb81ce6761c5b5bb2fe35ef8a
parent	51c6d9633f52eb6d06b0291005d1a0b5fd552bd9 [diff]