blob: 5c52f5f6520edee9ca0c6a35e1175ac6ae3eaa15 [file] [log] [blame]
danielk1977a3f06592009-04-23 14:58:39 +00001/*
2** 2005 December 14
3**
4** The author disclaims copyright to this source code. In place of
5** a legal notice, here is a blessing:
6**
7** May you do good and not evil.
8** May you find forgiveness for yourself and forgive others.
9** May you share freely, never taking more than you give.
10**
11*************************************************************************
12**
shanea3628d12009-04-29 18:11:59 +000013** $Id: sqlite3async.c,v 1.5 2009/04/29 18:12:00 shane Exp $
danielk1977a3f06592009-04-23 14:58:39 +000014**
danielk1977debcfd22009-04-24 09:27:16 +000015** This file contains the implementation of an asynchronous IO backend
16** for SQLite.
danielk1977a3f06592009-04-23 14:58:39 +000017*/
18
19#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_ASYNCIO)
20
21#include "sqlite3async.h"
shanea3628d12009-04-29 18:11:59 +000022#include "sqliteInt.h"
danielk19774598b8e2009-04-24 10:13:05 +000023#include <stdarg.h>
24#include <string.h>
25#include <assert.h>
danielk1977a3f06592009-04-23 14:58:39 +000026
danielk1977a3f06592009-04-23 14:58:39 +000027/* Useful macros used in several places */
28#define MIN(x,y) ((x)<(y)?(x):(y))
29#define MAX(x,y) ((x)>(y)?(x):(y))
30
31/* Forward references */
32typedef struct AsyncWrite AsyncWrite;
33typedef struct AsyncFile AsyncFile;
34typedef struct AsyncFileData AsyncFileData;
35typedef struct AsyncFileLock AsyncFileLock;
36typedef struct AsyncLock AsyncLock;
37
38/* Enable for debugging */
danielk19774598b8e2009-04-24 10:13:05 +000039#ifndef NDEBUG
40#include <stdio.h>
danielk1977a3f06592009-04-23 14:58:39 +000041static int sqlite3async_trace = 0;
42# define ASYNC_TRACE(X) if( sqlite3async_trace ) asyncTrace X
43static void asyncTrace(const char *zFormat, ...){
44 char *z;
45 va_list ap;
46 va_start(ap, zFormat);
47 z = sqlite3_vmprintf(zFormat, ap);
48 va_end(ap);
49 fprintf(stderr, "[%d] %s", 0 /* (int)pthread_self() */, z);
50 sqlite3_free(z);
51}
shanea3628d12009-04-29 18:11:59 +000052#else
53# define ASYNC_TRACE(X)
danielk19774598b8e2009-04-24 10:13:05 +000054#endif
danielk1977a3f06592009-04-23 14:58:39 +000055
56/*
57** THREAD SAFETY NOTES
58**
59** Basic rules:
60**
61** * Both read and write access to the global write-op queue must be
62** protected by the async.queueMutex. As are the async.ioError and
63** async.nFile variables.
64**
65** * The async.pLock list and all AsyncLock and AsyncFileLock
66** structures must be protected by the async.lockMutex mutex.
67**
68** * The file handles from the underlying system are not assumed to
69** be thread safe.
70**
71** * See the last two paragraphs under "The Writer Thread" for
72** an assumption to do with file-handle synchronization by the Os.
73**
74** Deadlock prevention:
75**
76** There are three mutex used by the system: the "writer" mutex,
77** the "queue" mutex and the "lock" mutex. Rules are:
78**
79** * It is illegal to block on the writer mutex when any other mutex
80** are held, and
81**
82** * It is illegal to block on the queue mutex when the lock mutex
83** is held.
84**
85** i.e. mutex's must be grabbed in the order "writer", "queue", "lock".
86**
87** File system operations (invoked by SQLite thread):
88**
89** xOpen
90** xDelete
91** xFileExists
92**
93** File handle operations (invoked by SQLite thread):
94**
95** asyncWrite, asyncClose, asyncTruncate, asyncSync
96**
97** The operations above add an entry to the global write-op list. They
98** prepare the entry, acquire the async.queueMutex momentarily while
99** list pointers are manipulated to insert the new entry, then release
100** the mutex and signal the writer thread to wake up in case it happens
101** to be asleep.
102**
103**
104** asyncRead, asyncFileSize.
105**
106** Read operations. Both of these read from both the underlying file
107** first then adjust their result based on pending writes in the
108** write-op queue. So async.queueMutex is held for the duration
109** of these operations to prevent other threads from changing the
110** queue in mid operation.
111**
112**
113** asyncLock, asyncUnlock, asyncCheckReservedLock
114**
115** These primitives implement in-process locking using a hash table
116** on the file name. Files are locked correctly for connections coming
117** from the same process. But other processes cannot see these locks
118** and will therefore not honor them.
119**
120**
121** The writer thread:
122**
123** The async.writerMutex is used to make sure only there is only
124** a single writer thread running at a time.
125**
126** Inside the writer thread is a loop that works like this:
127**
128** WHILE (write-op list is not empty)
129** Do IO operation at head of write-op list
130** Remove entry from head of write-op list
131** END WHILE
132**
133** The async.queueMutex is always held during the <write-op list is
134** not empty> test, and when the entry is removed from the head
135** of the write-op list. Sometimes it is held for the interim
136** period (while the IO is performed), and sometimes it is
137** relinquished. It is relinquished if (a) the IO op is an
138** ASYNC_CLOSE or (b) when the file handle was opened, two of
139** the underlying systems handles were opened on the same
140** file-system entry.
141**
142** If condition (b) above is true, then one file-handle
143** (AsyncFile.pBaseRead) is used exclusively by sqlite threads to read the
144** file, the other (AsyncFile.pBaseWrite) by sqlite3_async_flush()
145** threads to perform write() operations. This means that read
146** operations are not blocked by asynchronous writes (although
147** asynchronous writes may still be blocked by reads).
148**
149** This assumes that the OS keeps two handles open on the same file
150** properly in sync. That is, any read operation that starts after a
151** write operation on the same file system entry has completed returns
152** data consistent with the write. We also assume that if one thread
153** reads a file while another is writing it all bytes other than the
154** ones actually being written contain valid data.
155**
156** If the above assumptions are not true, set the preprocessor symbol
157** SQLITE_ASYNC_TWO_FILEHANDLES to 0.
158*/
159
160
161#ifndef NDEBUG
162# define TESTONLY( X ) X
163#else
164# define TESTONLY( X )
165#endif
166
167/*
danielk1977debcfd22009-04-24 09:27:16 +0000168** PORTING FUNCTIONS
169**
danielk1977a3f06592009-04-23 14:58:39 +0000170** There are two definitions of the following functions. One for pthreads
171** compatible systems and one for Win32. These functions isolate the OS
172** specific code required by each platform.
173**
174** The system uses three mutexes and a single condition variable. To
175** block on a mutex, async_mutex_enter() is called. The parameter passed
176** to async_mutex_enter(), which must be one of ASYNC_MUTEX_LOCK,
177** ASYNC_MUTEX_QUEUE or ASYNC_MUTEX_WRITER, identifies which of the three
178** mutexes to lock. Similarly, to unlock a mutex, async_mutex_leave() is
179** called with a parameter identifying the mutex being unlocked. Mutexes
180** are not recursive - it is an error to call async_mutex_enter() to
181** lock a mutex that is already locked, or to call async_mutex_leave()
182** to unlock a mutex that is not currently locked.
183**
184** The async_cond_wait() and async_cond_signal() functions are modelled
185** on the pthreads functions with similar names. The first parameter to
186** both functions is always ASYNC_COND_QUEUE. When async_cond_wait()
187** is called the mutex identified by the second parameter must be held.
188** The mutex is unlocked, and the calling thread simultaneously begins
189** waiting for the condition variable to be signalled by another thread.
190** After another thread signals the condition variable, the calling
191** thread stops waiting, locks mutex eMutex and returns. The
192** async_cond_signal() function is used to signal the condition variable.
193** It is assumed that the mutex used by the thread calling async_cond_wait()
194** is held by the caller of async_cond_signal() (otherwise there would be
195** a race condition).
196**
197** It is guaranteed that no other thread will call async_cond_wait() when
198** there is already a thread waiting on the condition variable.
199**
200** The async_sched_yield() function is called to suggest to the operating
201** system that it would be a good time to shift the current thread off the
202** CPU. The system will still work if this function is not implemented
203** (it is not currently implemented for win32), but it might be marginally
204** more efficient if it is.
205*/
206static void async_mutex_enter(int eMutex);
207static void async_mutex_leave(int eMutex);
208static void async_cond_wait(int eCond, int eMutex);
209static void async_cond_signal(int eCond);
210static void async_sched_yield(void);
211
212/*
213** There are also two definitions of the following. async_os_initialize()
214** is called when the asynchronous VFS is first installed, and os_shutdown()
215** is called when it is uninstalled (from within sqlite3async_shutdown()).
216**
217** For pthreads builds, both of these functions are no-ops. For win32,
218** they provide an opportunity to initialize and finalize the required
219** mutex and condition variables.
220**
221** If async_os_initialize() returns other than zero, then the initialization
222** fails and SQLITE_ERROR is returned to the user.
223*/
224static int async_os_initialize(void);
225static void async_os_shutdown(void);
226
227/* Values for use as the 'eMutex' argument of the above functions. The
228** integer values assigned to these constants are important for assert()
229** statements that verify that mutexes are locked in the correct order.
230** Specifically, it is unsafe to try to lock mutex N while holding a lock
231** on mutex M if (M<=N).
232*/
233#define ASYNC_MUTEX_LOCK 0
234#define ASYNC_MUTEX_QUEUE 1
235#define ASYNC_MUTEX_WRITER 2
236
237/* Values for use as the 'eCond' argument of the above functions. */
238#define ASYNC_COND_QUEUE 0
239
240/*************************************************************************
241** Start of OS specific code.
242*/
243#if SQLITE_OS_WIN || defined(_WIN32) || defined(WIN32) || defined(__CYGWIN__) || defined(__MINGW32__) || defined(__BORLANDC__)
244
245/* The following block contains the win32 specific code. */
246
247#define mutex_held(X) (GetCurrentThreadId()==primitives.aHolder[X])
248
249static struct AsyncPrimitives {
250 int isInit;
251 DWORD aHolder[3];
252 CRITICAL_SECTION aMutex[3];
253 HANDLE aCond[1];
254} primitives = { 0 };
255
256static int async_os_initialize(void){
257 if( !primitives.isInit ){
258 primitives.aCond[0] = CreateEvent(NULL, TRUE, FALSE, 0);
259 if( primitives.aCond[0]==NULL ){
260 return 1;
261 }
262 InitializeCriticalSection(&primitives.aMutex[0]);
263 InitializeCriticalSection(&primitives.aMutex[1]);
264 InitializeCriticalSection(&primitives.aMutex[2]);
265 primitives.isInit = 1;
266 }
267 return 0;
268}
269static void async_os_shutdown(void){
270 if( primitives.isInit ){
271 DeleteCriticalSection(&primitives.aMutex[0]);
272 DeleteCriticalSection(&primitives.aMutex[1]);
273 DeleteCriticalSection(&primitives.aMutex[2]);
274 CloseHandle(primitives.aCond[0]);
275 primitives.isInit = 0;
276 }
277}
278
279/* The following block contains the Win32 specific code. */
280static void async_mutex_enter(int eMutex){
281 assert( eMutex==0 || eMutex==1 || eMutex==2 );
282 assert( eMutex!=2 || (!mutex_held(0) && !mutex_held(1) && !mutex_held(2)) );
283 assert( eMutex!=1 || (!mutex_held(0) && !mutex_held(1)) );
284 assert( eMutex!=0 || (!mutex_held(0)) );
285 EnterCriticalSection(&primitives.aMutex[eMutex]);
286 TESTONLY( primitives.aHolder[eMutex] = GetCurrentThreadId(); )
287}
288static void async_mutex_leave(int eMutex){
289 assert( eMutex==0 || eMutex==1 || eMutex==2 );
290 assert( mutex_held(eMutex) );
291 TESTONLY( primitives.aHolder[eMutex] = 0; )
292 LeaveCriticalSection(&primitives.aMutex[eMutex]);
293}
294static void async_cond_wait(int eCond, int eMutex){
295 ResetEvent(primitives.aCond[eCond]);
296 async_mutex_leave(eMutex);
297 WaitForSingleObject(primitives.aCond[eCond], INFINITE);
298 async_mutex_enter(eMutex);
299}
300static void async_cond_signal(int eCond){
301 assert( mutex_held(ASYNC_MUTEX_QUEUE) );
302 SetEvent(primitives.aCond[eCond]);
303}
304static void async_sched_yield(void){
305 /* Todo: Find out if win32 offers anything like sched_yield() */
shanea3628d12009-04-29 18:11:59 +0000306 Sleep(0);
danielk1977a3f06592009-04-23 14:58:39 +0000307}
308#else
309
310/* The following block contains the pthreads specific code. */
311#include <pthread.h>
312#include <sched.h>
313
314#define mutex_held(X) pthread_equal(primitives.aHolder[X], pthread_self())
315
316static int async_os_initialize(void) {return 0;}
317static void async_os_shutdown(void) {}
318
319static struct AsyncPrimitives {
320 pthread_mutex_t aMutex[3];
321 pthread_cond_t aCond[1];
322 pthread_t aHolder[3];
323} primitives = {
324 { PTHREAD_MUTEX_INITIALIZER,
325 PTHREAD_MUTEX_INITIALIZER,
326 PTHREAD_MUTEX_INITIALIZER
327 } , {
328 PTHREAD_COND_INITIALIZER
329 } , { 0, 0, 0 }
330};
331
332static void async_mutex_enter(int eMutex){
333 assert( eMutex==0 || eMutex==1 || eMutex==2 );
334 assert( eMutex!=2 || (!mutex_held(0) && !mutex_held(1) && !mutex_held(2)) );
335 assert( eMutex!=1 || (!mutex_held(0) && !mutex_held(1)) );
336 assert( eMutex!=0 || (!mutex_held(0)) );
337 pthread_mutex_lock(&primitives.aMutex[eMutex]);
338 TESTONLY( primitives.aHolder[eMutex] = pthread_self(); )
339}
340static void async_mutex_leave(int eMutex){
341 assert( eMutex==0 || eMutex==1 || eMutex==2 );
342 assert( mutex_held(eMutex) );
343 TESTONLY( primitives.aHolder[eMutex] = 0; )
344 pthread_mutex_unlock(&primitives.aMutex[eMutex]);
345}
346static void async_cond_wait(int eCond, int eMutex){
347 assert( eMutex==0 || eMutex==1 || eMutex==2 );
348 assert( mutex_held(eMutex) );
349 TESTONLY( primitives.aHolder[eMutex] = 0; )
350 pthread_cond_wait(&primitives.aCond[eCond], &primitives.aMutex[eMutex]);
351 TESTONLY( primitives.aHolder[eMutex] = pthread_self(); )
352}
353static void async_cond_signal(int eCond){
354 assert( mutex_held(ASYNC_MUTEX_QUEUE) );
355 pthread_cond_signal(&primitives.aCond[eCond]);
356}
357static void async_sched_yield(void){
358 sched_yield();
359}
360#endif
361/*
362** End of OS specific code.
363*************************************************************************/
364
365#define assert_mutex_is_held(X) assert( mutex_held(X) )
366
367
368#ifndef SQLITE_ASYNC_TWO_FILEHANDLES
369/* #define SQLITE_ASYNC_TWO_FILEHANDLES 0 */
370#define SQLITE_ASYNC_TWO_FILEHANDLES 1
371#endif
372
373/*
374** State information is held in the static variable "async" defined
375** as the following structure.
376**
377** Both async.ioError and async.nFile are protected by async.queueMutex.
378*/
379static struct TestAsyncStaticData {
380 AsyncWrite *pQueueFirst; /* Next write operation to be processed */
381 AsyncWrite *pQueueLast; /* Last write operation on the list */
382 AsyncLock *pLock; /* Linked list of all AsyncLock structures */
383 volatile int ioDelay; /* Extra delay between write operations */
384 volatile int eHalt; /* One of the SQLITEASYNC_HALT_XXX values */
danielk19774598b8e2009-04-24 10:13:05 +0000385 volatile int bLockFiles; /* Current value of "lockfiles" parameter */
danielk1977a3f06592009-04-23 14:58:39 +0000386 int ioError; /* True if an IO error has occurred */
387 int nFile; /* Number of open files (from sqlite pov) */
danielk19774598b8e2009-04-24 10:13:05 +0000388} async = { 0,0,0,0,0,1,0,0 };
danielk1977a3f06592009-04-23 14:58:39 +0000389
390/* Possible values of AsyncWrite.op */
391#define ASYNC_NOOP 0
392#define ASYNC_WRITE 1
393#define ASYNC_SYNC 2
394#define ASYNC_TRUNCATE 3
395#define ASYNC_CLOSE 4
396#define ASYNC_DELETE 5
397#define ASYNC_OPENEXCLUSIVE 6
398#define ASYNC_UNLOCK 7
399
400/* Names of opcodes. Used for debugging only.
401** Make sure these stay in sync with the macros above!
402*/
403static const char *azOpcodeName[] = {
404 "NOOP", "WRITE", "SYNC", "TRUNCATE", "CLOSE", "DELETE", "OPENEX", "UNLOCK"
405};
406
407/*
408** Entries on the write-op queue are instances of the AsyncWrite
409** structure, defined here.
410**
411** The interpretation of the iOffset and nByte variables varies depending
412** on the value of AsyncWrite.op:
413**
414** ASYNC_NOOP:
415** No values used.
416**
417** ASYNC_WRITE:
418** iOffset -> Offset in file to write to.
419** nByte -> Number of bytes of data to write (pointed to by zBuf).
420**
421** ASYNC_SYNC:
422** nByte -> flags to pass to sqlite3OsSync().
423**
424** ASYNC_TRUNCATE:
425** iOffset -> Size to truncate file to.
426** nByte -> Unused.
427**
428** ASYNC_CLOSE:
429** iOffset -> Unused.
430** nByte -> Unused.
431**
432** ASYNC_DELETE:
433** iOffset -> Contains the "syncDir" flag.
434** nByte -> Number of bytes of zBuf points to (file name).
435**
436** ASYNC_OPENEXCLUSIVE:
437** iOffset -> Value of "delflag".
438** nByte -> Number of bytes of zBuf points to (file name).
439**
440** ASYNC_UNLOCK:
441** nByte -> Argument to sqlite3OsUnlock().
442**
443**
444** For an ASYNC_WRITE operation, zBuf points to the data to write to the file.
445** This space is sqlite3_malloc()d along with the AsyncWrite structure in a
446** single blob, so is deleted when sqlite3_free() is called on the parent
447** structure.
448*/
449struct AsyncWrite {
450 AsyncFileData *pFileData; /* File to write data to or sync */
451 int op; /* One of ASYNC_xxx etc. */
452 sqlite_int64 iOffset; /* See above */
453 int nByte; /* See above */
454 char *zBuf; /* Data to write to file (or NULL if op!=ASYNC_WRITE) */
455 AsyncWrite *pNext; /* Next write operation (to any file) */
456};
457
458/*
459** An instance of this structure is created for each distinct open file
460** (i.e. if two handles are opened on the one file, only one of these
461** structures is allocated) and stored in the async.aLock hash table. The
462** keys for async.aLock are the full pathnames of the opened files.
463**
464** AsyncLock.pList points to the head of a linked list of AsyncFileLock
465** structures, one for each handle currently open on the file.
466**
467** If the opened file is not a main-database (the SQLITE_OPEN_MAIN_DB is
danielk19774598b8e2009-04-24 10:13:05 +0000468** not passed to the sqlite3OsOpen() call), or if async.bLockFiles is
469** false, variables AsyncLock.pFile and AsyncLock.eLock are never used.
470** Otherwise, pFile is a file handle opened on the file in question and
471** used to obtain the file-system locks required by database connections
472** within this process.
danielk1977a3f06592009-04-23 14:58:39 +0000473**
474** See comments above the asyncLock() function for more details on
475** the implementation of database locking used by this backend.
476*/
477struct AsyncLock {
478 char *zFile;
479 int nFile;
480 sqlite3_file *pFile;
481 int eLock;
482 AsyncFileLock *pList;
483 AsyncLock *pNext; /* Next in linked list headed by async.pLock */
484};
485
486/*
487** An instance of the following structure is allocated along with each
488** AsyncFileData structure (see AsyncFileData.lock), but is only used if the
489** file was opened with the SQLITE_OPEN_MAIN_DB.
490*/
491struct AsyncFileLock {
492 int eLock; /* Internally visible lock state (sqlite pov) */
493 int eAsyncLock; /* Lock-state with write-queue unlock */
494 AsyncFileLock *pNext;
495};
496
497/*
498** The AsyncFile structure is a subclass of sqlite3_file used for
499** asynchronous IO.
500**
501** All of the actual data for the structure is stored in the structure
502** pointed to by AsyncFile.pData, which is allocated as part of the
503** sqlite3OsOpen() using sqlite3_malloc(). The reason for this is that the
504** lifetime of the AsyncFile structure is ended by the caller after OsClose()
505** is called, but the data in AsyncFileData may be required by the
506** writer thread after that point.
507*/
508struct AsyncFile {
509 sqlite3_io_methods *pMethod;
510 AsyncFileData *pData;
511};
512struct AsyncFileData {
513 char *zName; /* Underlying OS filename - used for debugging */
514 int nName; /* Number of characters in zName */
515 sqlite3_file *pBaseRead; /* Read handle to the underlying Os file */
516 sqlite3_file *pBaseWrite; /* Write handle to the underlying Os file */
517 AsyncFileLock lock; /* Lock state for this handle */
518 AsyncLock *pLock; /* AsyncLock object for this file system entry */
519 AsyncWrite closeOp; /* Preallocated close operation */
520};
521
522/*
523** Add an entry to the end of the global write-op list. pWrite should point
524** to an AsyncWrite structure allocated using sqlite3_malloc(). The writer
525** thread will call sqlite3_free() to free the structure after the specified
526** operation has been completed.
527**
528** Once an AsyncWrite structure has been added to the list, it becomes the
529** property of the writer thread and must not be read or modified by the
530** caller.
531*/
532static void addAsyncWrite(AsyncWrite *pWrite){
533 /* We must hold the queue mutex in order to modify the queue pointers */
534 if( pWrite->op!=ASYNC_UNLOCK ){
535 async_mutex_enter(ASYNC_MUTEX_QUEUE);
536 }
537
538 /* Add the record to the end of the write-op queue */
539 assert( !pWrite->pNext );
540 if( async.pQueueLast ){
541 assert( async.pQueueFirst );
542 async.pQueueLast->pNext = pWrite;
543 }else{
544 async.pQueueFirst = pWrite;
545 }
546 async.pQueueLast = pWrite;
547 ASYNC_TRACE(("PUSH %p (%s %s %d)\n", pWrite, azOpcodeName[pWrite->op],
548 pWrite->pFileData ? pWrite->pFileData->zName : "-", pWrite->iOffset));
549
550 if( pWrite->op==ASYNC_CLOSE ){
551 async.nFile--;
552 }
553
554 /* The writer thread might have been idle because there was nothing
555 ** on the write-op queue for it to do. So wake it up. */
556 async_cond_signal(ASYNC_COND_QUEUE);
557
558 /* Drop the queue mutex */
559 if( pWrite->op!=ASYNC_UNLOCK ){
560 async_mutex_leave(ASYNC_MUTEX_QUEUE);
561 }
562}
563
564/*
565** Increment async.nFile in a thread-safe manner.
566*/
567static void incrOpenFileCount(void){
568 /* We must hold the queue mutex in order to modify async.nFile */
569 async_mutex_enter(ASYNC_MUTEX_QUEUE);
570 if( async.nFile==0 ){
571 async.ioError = SQLITE_OK;
572 }
573 async.nFile++;
574 async_mutex_leave(ASYNC_MUTEX_QUEUE);
575}
576
577/*
578** This is a utility function to allocate and populate a new AsyncWrite
579** structure and insert it (via addAsyncWrite() ) into the global list.
580*/
581static int addNewAsyncWrite(
582 AsyncFileData *pFileData,
583 int op,
584 sqlite3_int64 iOffset,
585 int nByte,
586 const char *zByte
587){
588 AsyncWrite *p;
589 if( op!=ASYNC_CLOSE && async.ioError ){
590 return async.ioError;
591 }
592 p = sqlite3_malloc(sizeof(AsyncWrite) + (zByte?nByte:0));
593 if( !p ){
594 /* The upper layer does not expect operations like OsWrite() to
595 ** return SQLITE_NOMEM. This is partly because under normal conditions
596 ** SQLite is required to do rollback without calling malloc(). So
597 ** if malloc() fails here, treat it as an I/O error. The above
598 ** layer knows how to handle that.
599 */
600 return SQLITE_IOERR;
601 }
602 p->op = op;
603 p->iOffset = iOffset;
604 p->nByte = nByte;
605 p->pFileData = pFileData;
606 p->pNext = 0;
607 if( zByte ){
608 p->zBuf = (char *)&p[1];
609 memcpy(p->zBuf, zByte, nByte);
610 }else{
611 p->zBuf = 0;
612 }
613 addAsyncWrite(p);
614 return SQLITE_OK;
615}
616
617/*
618** Close the file. This just adds an entry to the write-op list, the file is
619** not actually closed.
620*/
621static int asyncClose(sqlite3_file *pFile){
622 AsyncFileData *p = ((AsyncFile *)pFile)->pData;
623
624 /* Unlock the file, if it is locked */
625 async_mutex_enter(ASYNC_MUTEX_LOCK);
626 p->lock.eLock = 0;
627 async_mutex_leave(ASYNC_MUTEX_LOCK);
628
629 addAsyncWrite(&p->closeOp);
630 return SQLITE_OK;
631}
632
633/*
634** Implementation of sqlite3OsWrite() for asynchronous files. Instead of
635** writing to the underlying file, this function adds an entry to the end of
636** the global AsyncWrite list. Either SQLITE_OK or SQLITE_NOMEM may be
637** returned.
638*/
639static int asyncWrite(
640 sqlite3_file *pFile,
641 const void *pBuf,
642 int amt,
643 sqlite3_int64 iOff
644){
645 AsyncFileData *p = ((AsyncFile *)pFile)->pData;
646 return addNewAsyncWrite(p, ASYNC_WRITE, iOff, amt, pBuf);
647}
648
649/*
650** Read data from the file. First we read from the filesystem, then adjust
651** the contents of the buffer based on ASYNC_WRITE operations in the
652** write-op queue.
653**
654** This method holds the mutex from start to finish.
655*/
656static int asyncRead(
657 sqlite3_file *pFile,
658 void *zOut,
659 int iAmt,
660 sqlite3_int64 iOffset
661){
662 AsyncFileData *p = ((AsyncFile *)pFile)->pData;
663 int rc = SQLITE_OK;
664 sqlite3_int64 filesize;
665 int nRead;
666 sqlite3_file *pBase = p->pBaseRead;
667
668 /* Grab the write queue mutex for the duration of the call */
669 async_mutex_enter(ASYNC_MUTEX_QUEUE);
670
671 /* If an I/O error has previously occurred in this virtual file
672 ** system, then all subsequent operations fail.
673 */
674 if( async.ioError!=SQLITE_OK ){
675 rc = async.ioError;
676 goto asyncread_out;
677 }
678
679 if( pBase->pMethods ){
680 rc = pBase->pMethods->xFileSize(pBase, &filesize);
681 if( rc!=SQLITE_OK ){
682 goto asyncread_out;
683 }
shanea3628d12009-04-29 18:11:59 +0000684 nRead = (int)MIN(filesize - iOffset, iAmt);
danielk1977a3f06592009-04-23 14:58:39 +0000685 if( nRead>0 ){
686 rc = pBase->pMethods->xRead(pBase, zOut, nRead, iOffset);
687 ASYNC_TRACE(("READ %s %d bytes at %d\n", p->zName, nRead, iOffset));
688 }
689 }
690
691 if( rc==SQLITE_OK ){
692 AsyncWrite *pWrite;
693 char *zName = p->zName;
694
695 for(pWrite=async.pQueueFirst; pWrite; pWrite = pWrite->pNext){
696 if( pWrite->op==ASYNC_WRITE && (
697 (pWrite->pFileData==p) ||
698 (zName && pWrite->pFileData->zName==zName)
699 )){
shanea3628d12009-04-29 18:11:59 +0000700 sqlite3_int64 iBeginOut = (pWrite->iOffset-iOffset);
701 sqlite3_int64 iBeginIn = -iBeginOut;
danielk1977a3f06592009-04-23 14:58:39 +0000702 int nCopy;
703
704 if( iBeginIn<0 ) iBeginIn = 0;
705 if( iBeginOut<0 ) iBeginOut = 0;
706 nCopy = MIN(pWrite->nByte-iBeginIn, iAmt-iBeginOut);
707
708 if( nCopy>0 ){
709 memcpy(&((char *)zOut)[iBeginOut], &pWrite->zBuf[iBeginIn], nCopy);
710 ASYNC_TRACE(("OVERREAD %d bytes at %d\n", nCopy, iBeginOut+iOffset));
711 }
712 }
713 }
714 }
715
716asyncread_out:
717 async_mutex_leave(ASYNC_MUTEX_QUEUE);
718 return rc;
719}
720
721/*
722** Truncate the file to nByte bytes in length. This just adds an entry to
723** the write-op list, no IO actually takes place.
724*/
725static int asyncTruncate(sqlite3_file *pFile, sqlite3_int64 nByte){
726 AsyncFileData *p = ((AsyncFile *)pFile)->pData;
727 return addNewAsyncWrite(p, ASYNC_TRUNCATE, nByte, 0, 0);
728}
729
730/*
731** Sync the file. This just adds an entry to the write-op list, the
732** sync() is done later by sqlite3_async_flush().
733*/
734static int asyncSync(sqlite3_file *pFile, int flags){
735 AsyncFileData *p = ((AsyncFile *)pFile)->pData;
736 return addNewAsyncWrite(p, ASYNC_SYNC, 0, flags, 0);
737}
738
739/*
740** Read the size of the file. First we read the size of the file system
741** entry, then adjust for any ASYNC_WRITE or ASYNC_TRUNCATE operations
742** currently in the write-op list.
743**
744** This method holds the mutex from start to finish.
745*/
746int asyncFileSize(sqlite3_file *pFile, sqlite3_int64 *piSize){
747 AsyncFileData *p = ((AsyncFile *)pFile)->pData;
748 int rc = SQLITE_OK;
749 sqlite3_int64 s = 0;
750 sqlite3_file *pBase;
751
752 async_mutex_enter(ASYNC_MUTEX_QUEUE);
753
754 /* Read the filesystem size from the base file. If pBaseRead is NULL, this
755 ** means the file hasn't been opened yet. In this case all relevant data
756 ** must be in the write-op queue anyway, so we can omit reading from the
757 ** file-system.
758 */
759 pBase = p->pBaseRead;
760 if( pBase->pMethods ){
761 rc = pBase->pMethods->xFileSize(pBase, &s);
762 }
763
764 if( rc==SQLITE_OK ){
765 AsyncWrite *pWrite;
766 for(pWrite=async.pQueueFirst; pWrite; pWrite = pWrite->pNext){
767 if( pWrite->op==ASYNC_DELETE
768 && p->zName
769 && strcmp(p->zName, pWrite->zBuf)==0
770 ){
771 s = 0;
772 }else if( pWrite->pFileData && (
773 (pWrite->pFileData==p)
774 || (p->zName && pWrite->pFileData->zName==p->zName)
775 )){
776 switch( pWrite->op ){
777 case ASYNC_WRITE:
778 s = MAX(pWrite->iOffset + (sqlite3_int64)(pWrite->nByte), s);
779 break;
780 case ASYNC_TRUNCATE:
781 s = MIN(s, pWrite->iOffset);
782 break;
783 }
784 }
785 }
786 *piSize = s;
787 }
788 async_mutex_leave(ASYNC_MUTEX_QUEUE);
789 return rc;
790}
791
792/*
793** Lock or unlock the actual file-system entry.
794*/
795static int getFileLock(AsyncLock *pLock){
796 int rc = SQLITE_OK;
797 AsyncFileLock *pIter;
798 int eRequired = 0;
799
800 if( pLock->pFile ){
801 for(pIter=pLock->pList; pIter; pIter=pIter->pNext){
802 assert(pIter->eAsyncLock>=pIter->eLock);
803 if( pIter->eAsyncLock>eRequired ){
804 eRequired = pIter->eAsyncLock;
805 assert(eRequired>=0 && eRequired<=SQLITE_LOCK_EXCLUSIVE);
806 }
807 }
808
809 if( eRequired>pLock->eLock ){
810 rc = pLock->pFile->pMethods->xLock(pLock->pFile, eRequired);
811 if( rc==SQLITE_OK ){
812 pLock->eLock = eRequired;
813 }
814 }
815 else if( eRequired<pLock->eLock && eRequired<=SQLITE_LOCK_SHARED ){
816 rc = pLock->pFile->pMethods->xUnlock(pLock->pFile, eRequired);
817 if( rc==SQLITE_OK ){
818 pLock->eLock = eRequired;
819 }
820 }
821 }
822
823 return rc;
824}
825
826/*
827** Return the AsyncLock structure from the global async.pLock list
828** associated with the file-system entry identified by path zName
829** (a string of nName bytes). If no such structure exists, return 0.
830*/
831static AsyncLock *findLock(const char *zName, int nName){
832 AsyncLock *p = async.pLock;
833 while( p && (p->nFile!=nName || memcmp(p->zFile, zName, nName)) ){
834 p = p->pNext;
835 }
836 return p;
837}
838
839/*
840** The following two methods - asyncLock() and asyncUnlock() - are used
841** to obtain and release locks on database files opened with the
842** asynchronous backend.
843*/
844static int asyncLock(sqlite3_file *pFile, int eLock){
845 int rc = SQLITE_OK;
846 AsyncFileData *p = ((AsyncFile *)pFile)->pData;
847
848 if( p->zName ){
849 async_mutex_enter(ASYNC_MUTEX_LOCK);
850 if( p->lock.eLock<eLock ){
851 AsyncLock *pLock = p->pLock;
852 AsyncFileLock *pIter;
853 assert(pLock && pLock->pList);
854 for(pIter=pLock->pList; pIter; pIter=pIter->pNext){
855 if( pIter!=&p->lock && (
856 (eLock==SQLITE_LOCK_EXCLUSIVE && pIter->eLock>=SQLITE_LOCK_SHARED) ||
857 (eLock==SQLITE_LOCK_PENDING && pIter->eLock>=SQLITE_LOCK_RESERVED) ||
858 (eLock==SQLITE_LOCK_RESERVED && pIter->eLock>=SQLITE_LOCK_RESERVED) ||
859 (eLock==SQLITE_LOCK_SHARED && pIter->eLock>=SQLITE_LOCK_PENDING)
860 )){
861 rc = SQLITE_BUSY;
862 }
863 }
864 if( rc==SQLITE_OK ){
865 p->lock.eLock = eLock;
866 p->lock.eAsyncLock = MAX(p->lock.eAsyncLock, eLock);
867 }
868 assert(p->lock.eAsyncLock>=p->lock.eLock);
869 if( rc==SQLITE_OK ){
870 rc = getFileLock(pLock);
871 }
872 }
873 async_mutex_leave(ASYNC_MUTEX_LOCK);
874 }
875
876 ASYNC_TRACE(("LOCK %d (%s) rc=%d\n", eLock, p->zName, rc));
877 return rc;
878}
879static int asyncUnlock(sqlite3_file *pFile, int eLock){
880 int rc = SQLITE_OK;
881 AsyncFileData *p = ((AsyncFile *)pFile)->pData;
882 if( p->zName ){
883 AsyncFileLock *pLock = &p->lock;
884 async_mutex_enter(ASYNC_MUTEX_QUEUE);
885 async_mutex_enter(ASYNC_MUTEX_LOCK);
886 pLock->eLock = MIN(pLock->eLock, eLock);
887 rc = addNewAsyncWrite(p, ASYNC_UNLOCK, 0, eLock, 0);
888 async_mutex_leave(ASYNC_MUTEX_LOCK);
889 async_mutex_leave(ASYNC_MUTEX_QUEUE);
890 }
891 return rc;
892}
893
894/*
895** This function is called when the pager layer first opens a database file
896** and is checking for a hot-journal.
897*/
898static int asyncCheckReservedLock(sqlite3_file *pFile, int *pResOut){
899 int ret = 0;
900 AsyncFileLock *pIter;
901 AsyncFileData *p = ((AsyncFile *)pFile)->pData;
902
903 async_mutex_enter(ASYNC_MUTEX_LOCK);
904 for(pIter=p->pLock->pList; pIter; pIter=pIter->pNext){
905 if( pIter->eLock>=SQLITE_LOCK_RESERVED ){
906 ret = 1;
shanea3628d12009-04-29 18:11:59 +0000907 break;
danielk1977a3f06592009-04-23 14:58:39 +0000908 }
909 }
910 async_mutex_leave(ASYNC_MUTEX_LOCK);
911
912 ASYNC_TRACE(("CHECK-LOCK %d (%s)\n", ret, p->zName));
913 *pResOut = ret;
914 return SQLITE_OK;
915}
916
917/*
918** sqlite3_file_control() implementation.
919*/
920static int asyncFileControl(sqlite3_file *id, int op, void *pArg){
921 switch( op ){
922 case SQLITE_FCNTL_LOCKSTATE: {
923 async_mutex_enter(ASYNC_MUTEX_LOCK);
924 *(int*)pArg = ((AsyncFile*)id)->pData->lock.eLock;
925 async_mutex_leave(ASYNC_MUTEX_LOCK);
926 return SQLITE_OK;
927 }
928 }
929 return SQLITE_ERROR;
930}
931
932/*
933** Return the device characteristics and sector-size of the device. It
shanea3628d12009-04-29 18:11:59 +0000934** is tricky to implement these correctly, as this backend might
danielk1977a3f06592009-04-23 14:58:39 +0000935** not have an open file handle at this point.
936*/
937static int asyncSectorSize(sqlite3_file *pFile){
shanea3628d12009-04-29 18:11:59 +0000938 UNUSED_PARAMETER(pFile);
danielk1977a3f06592009-04-23 14:58:39 +0000939 return 512;
940}
941static int asyncDeviceCharacteristics(sqlite3_file *pFile){
shanea3628d12009-04-29 18:11:59 +0000942 UNUSED_PARAMETER(pFile);
danielk1977a3f06592009-04-23 14:58:39 +0000943 return 0;
944}
945
946static int unlinkAsyncFile(AsyncFileData *pData){
947 AsyncFileLock **ppIter;
948 int rc = SQLITE_OK;
949
950 if( pData->zName ){
951 AsyncLock *pLock = pData->pLock;
952 for(ppIter=&pLock->pList; *ppIter; ppIter=&((*ppIter)->pNext)){
953 if( (*ppIter)==&pData->lock ){
954 *ppIter = pData->lock.pNext;
955 break;
956 }
957 }
958 if( !pLock->pList ){
959 AsyncLock **pp;
960 if( pLock->pFile ){
961 pLock->pFile->pMethods->xClose(pLock->pFile);
962 }
963 for(pp=&async.pLock; *pp!=pLock; pp=&((*pp)->pNext));
964 *pp = pLock->pNext;
965 sqlite3_free(pLock);
966 }else{
967 rc = getFileLock(pLock);
968 }
969 }
970
971 return rc;
972}
973
974/*
975** The parameter passed to this function is a copy of a 'flags' parameter
976** passed to this modules xOpen() method. This function returns true
977** if the file should be opened asynchronously, or false if it should
978** be opened immediately.
979**
980** If the file is to be opened asynchronously, then asyncOpen() will add
981** an entry to the event queue and the file will not actually be opened
982** until the event is processed. Otherwise, the file is opened directly
983** by the caller.
984*/
985static int doAsynchronousOpen(int flags){
986 return (flags&SQLITE_OPEN_CREATE) && (
987 (flags&SQLITE_OPEN_MAIN_JOURNAL) ||
988 (flags&SQLITE_OPEN_TEMP_JOURNAL) ||
989 (flags&SQLITE_OPEN_DELETEONCLOSE)
990 );
991}
992
993/*
994** Open a file.
995*/
996static int asyncOpen(
997 sqlite3_vfs *pAsyncVfs,
998 const char *zName,
999 sqlite3_file *pFile,
1000 int flags,
1001 int *pOutFlags
1002){
1003 static sqlite3_io_methods async_methods = {
1004 1, /* iVersion */
1005 asyncClose, /* xClose */
1006 asyncRead, /* xRead */
1007 asyncWrite, /* xWrite */
1008 asyncTruncate, /* xTruncate */
1009 asyncSync, /* xSync */
1010 asyncFileSize, /* xFileSize */
1011 asyncLock, /* xLock */
1012 asyncUnlock, /* xUnlock */
1013 asyncCheckReservedLock, /* xCheckReservedLock */
1014 asyncFileControl, /* xFileControl */
1015 asyncSectorSize, /* xSectorSize */
1016 asyncDeviceCharacteristics /* xDeviceCharacteristics */
1017 };
1018
1019 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1020 AsyncFile *p = (AsyncFile *)pFile;
1021 int nName = 0;
1022 int rc = SQLITE_OK;
1023 int nByte;
1024 AsyncFileData *pData;
1025 AsyncLock *pLock = 0;
1026 char *z;
1027 int isAsyncOpen = doAsynchronousOpen(flags);
1028
1029 /* If zName is NULL, then the upper layer is requesting an anonymous file */
1030 if( zName ){
shanea3628d12009-04-29 18:11:59 +00001031 nName = (int)strlen(zName)+1;
danielk1977a3f06592009-04-23 14:58:39 +00001032 }
1033
1034 nByte = (
1035 sizeof(AsyncFileData) + /* AsyncFileData structure */
1036 2 * pVfs->szOsFile + /* AsyncFileData.pBaseRead and pBaseWrite */
1037 nName /* AsyncFileData.zName */
1038 );
1039 z = sqlite3_malloc(nByte);
1040 if( !z ){
1041 return SQLITE_NOMEM;
1042 }
1043 memset(z, 0, nByte);
1044 pData = (AsyncFileData*)z;
1045 z += sizeof(pData[0]);
1046 pData->pBaseRead = (sqlite3_file*)z;
1047 z += pVfs->szOsFile;
1048 pData->pBaseWrite = (sqlite3_file*)z;
1049 pData->closeOp.pFileData = pData;
1050 pData->closeOp.op = ASYNC_CLOSE;
1051
1052 if( zName ){
1053 z += pVfs->szOsFile;
1054 pData->zName = z;
1055 pData->nName = nName;
1056 memcpy(pData->zName, zName, nName);
1057 }
1058
1059 if( !isAsyncOpen ){
1060 int flagsout;
1061 rc = pVfs->xOpen(pVfs, pData->zName, pData->pBaseRead, flags, &flagsout);
1062 if( rc==SQLITE_OK && (flagsout&SQLITE_OPEN_READWRITE) ){
1063 rc = pVfs->xOpen(pVfs, pData->zName, pData->pBaseWrite, flags, 0);
1064 }
1065 if( pOutFlags ){
1066 *pOutFlags = flagsout;
1067 }
1068 }
1069
1070 async_mutex_enter(ASYNC_MUTEX_LOCK);
1071
1072 if( zName && rc==SQLITE_OK ){
1073 pLock = findLock(pData->zName, pData->nName);
1074 if( !pLock ){
1075 int nByte = pVfs->szOsFile + sizeof(AsyncLock) + pData->nName + 1;
1076 pLock = (AsyncLock *)sqlite3_malloc(nByte);
1077 if( pLock ){
1078 memset(pLock, 0, nByte);
danielk19774598b8e2009-04-24 10:13:05 +00001079 if( async.bLockFiles && (flags&SQLITE_OPEN_MAIN_DB) ){
danielk1977a3f06592009-04-23 14:58:39 +00001080 pLock->pFile = (sqlite3_file *)&pLock[1];
1081 rc = pVfs->xOpen(pVfs, pData->zName, pLock->pFile, flags, 0);
1082 if( rc!=SQLITE_OK ){
1083 sqlite3_free(pLock);
1084 pLock = 0;
1085 }
1086 }
danielk1977a3f06592009-04-23 14:58:39 +00001087 if( pLock ){
1088 pLock->nFile = pData->nName;
1089 pLock->zFile = &((char *)(&pLock[1]))[pVfs->szOsFile];
1090 memcpy(pLock->zFile, pData->zName, pLock->nFile);
1091 pLock->pNext = async.pLock;
1092 async.pLock = pLock;
1093 }
1094 }else{
1095 rc = SQLITE_NOMEM;
1096 }
1097 }
1098 }
1099
1100 if( rc==SQLITE_OK ){
1101 p->pMethod = &async_methods;
1102 p->pData = pData;
1103
1104 /* Link AsyncFileData.lock into the linked list of
1105 ** AsyncFileLock structures for this file.
1106 */
1107 if( zName ){
1108 pData->lock.pNext = pLock->pList;
1109 pLock->pList = &pData->lock;
1110 pData->zName = pLock->zFile;
1111 }
1112 }else{
1113 if( pData->pBaseRead->pMethods ){
1114 pData->pBaseRead->pMethods->xClose(pData->pBaseRead);
1115 }
1116 if( pData->pBaseWrite->pMethods ){
1117 pData->pBaseWrite->pMethods->xClose(pData->pBaseWrite);
1118 }
1119 sqlite3_free(pData);
1120 }
1121
1122 async_mutex_leave(ASYNC_MUTEX_LOCK);
1123
1124 if( rc==SQLITE_OK ){
1125 incrOpenFileCount();
1126 pData->pLock = pLock;
1127 }
1128
1129 if( rc==SQLITE_OK && isAsyncOpen ){
1130 rc = addNewAsyncWrite(pData, ASYNC_OPENEXCLUSIVE, (sqlite3_int64)flags,0,0);
1131 if( rc==SQLITE_OK ){
1132 if( pOutFlags ) *pOutFlags = flags;
1133 }else{
1134 async_mutex_enter(ASYNC_MUTEX_LOCK);
1135 unlinkAsyncFile(pData);
1136 async_mutex_leave(ASYNC_MUTEX_LOCK);
1137 sqlite3_free(pData);
1138 }
1139 }
1140 if( rc!=SQLITE_OK ){
1141 p->pMethod = 0;
1142 }
1143 return rc;
1144}
1145
1146/*
1147** Implementation of sqlite3OsDelete. Add an entry to the end of the
1148** write-op queue to perform the delete.
1149*/
1150static int asyncDelete(sqlite3_vfs *pAsyncVfs, const char *z, int syncDir){
shanea3628d12009-04-29 18:11:59 +00001151 UNUSED_PARAMETER(pAsyncVfs);
1152 return addNewAsyncWrite(0, ASYNC_DELETE, syncDir, (int)strlen(z)+1, z);
danielk1977a3f06592009-04-23 14:58:39 +00001153}
1154
1155/*
1156** Implementation of sqlite3OsAccess. This method holds the mutex from
1157** start to finish.
1158*/
1159static int asyncAccess(
1160 sqlite3_vfs *pAsyncVfs,
1161 const char *zName,
1162 int flags,
1163 int *pResOut
1164){
1165 int rc;
1166 int ret;
1167 AsyncWrite *p;
1168 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1169
1170 assert(flags==SQLITE_ACCESS_READWRITE
1171 || flags==SQLITE_ACCESS_READ
1172 || flags==SQLITE_ACCESS_EXISTS
1173 );
1174
1175 async_mutex_enter(ASYNC_MUTEX_QUEUE);
1176 rc = pVfs->xAccess(pVfs, zName, flags, &ret);
1177 if( rc==SQLITE_OK && flags==SQLITE_ACCESS_EXISTS ){
1178 for(p=async.pQueueFirst; p; p = p->pNext){
1179 if( p->op==ASYNC_DELETE && 0==strcmp(p->zBuf, zName) ){
1180 ret = 0;
1181 }else if( p->op==ASYNC_OPENEXCLUSIVE
1182 && p->pFileData->zName
1183 && 0==strcmp(p->pFileData->zName, zName)
1184 ){
1185 ret = 1;
1186 }
1187 }
1188 }
1189 ASYNC_TRACE(("ACCESS(%s): %s = %d\n",
1190 flags==SQLITE_ACCESS_READWRITE?"read-write":
1191 flags==SQLITE_ACCESS_READ?"read":"exists"
1192 , zName, ret)
1193 );
1194 async_mutex_leave(ASYNC_MUTEX_QUEUE);
1195 *pResOut = ret;
1196 return rc;
1197}
1198
1199/*
1200** Fill in zPathOut with the full path to the file identified by zPath.
1201*/
1202static int asyncFullPathname(
1203 sqlite3_vfs *pAsyncVfs,
1204 const char *zPath,
1205 int nPathOut,
1206 char *zPathOut
1207){
1208 int rc;
1209 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1210 rc = pVfs->xFullPathname(pVfs, zPath, nPathOut, zPathOut);
1211
1212 /* Because of the way intra-process file locking works, this backend
1213 ** needs to return a canonical path. The following block assumes the
1214 ** file-system uses unix style paths.
1215 */
1216 if( rc==SQLITE_OK ){
1217 int i, j;
1218 int n = nPathOut;
1219 char *z = zPathOut;
1220 while( n>1 && z[n-1]=='/' ){ n--; }
1221 for(i=j=0; i<n; i++){
1222 if( z[i]=='/' ){
1223 if( z[i+1]=='/' ) continue;
1224 if( z[i+1]=='.' && i+2<n && z[i+2]=='/' ){
1225 i += 1;
1226 continue;
1227 }
1228 if( z[i+1]=='.' && i+3<n && z[i+2]=='.' && z[i+3]=='/' ){
1229 while( j>0 && z[j-1]!='/' ){ j--; }
1230 if( j>0 ){ j--; }
1231 i += 2;
1232 continue;
1233 }
1234 }
1235 z[j++] = z[i];
1236 }
1237 z[j] = 0;
1238 }
1239
1240 return rc;
1241}
1242static void *asyncDlOpen(sqlite3_vfs *pAsyncVfs, const char *zPath){
1243 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1244 return pVfs->xDlOpen(pVfs, zPath);
1245}
1246static void asyncDlError(sqlite3_vfs *pAsyncVfs, int nByte, char *zErrMsg){
1247 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1248 pVfs->xDlError(pVfs, nByte, zErrMsg);
1249}
1250static void (*asyncDlSym(
1251 sqlite3_vfs *pAsyncVfs,
1252 void *pHandle,
1253 const char *zSymbol
1254))(void){
1255 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1256 return pVfs->xDlSym(pVfs, pHandle, zSymbol);
1257}
1258static void asyncDlClose(sqlite3_vfs *pAsyncVfs, void *pHandle){
1259 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1260 pVfs->xDlClose(pVfs, pHandle);
1261}
1262static int asyncRandomness(sqlite3_vfs *pAsyncVfs, int nByte, char *zBufOut){
1263 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1264 return pVfs->xRandomness(pVfs, nByte, zBufOut);
1265}
1266static int asyncSleep(sqlite3_vfs *pAsyncVfs, int nMicro){
1267 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1268 return pVfs->xSleep(pVfs, nMicro);
1269}
1270static int asyncCurrentTime(sqlite3_vfs *pAsyncVfs, double *pTimeOut){
1271 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1272 return pVfs->xCurrentTime(pVfs, pTimeOut);
1273}
1274
1275static sqlite3_vfs async_vfs = {
1276 1, /* iVersion */
1277 sizeof(AsyncFile), /* szOsFile */
1278 0, /* mxPathname */
1279 0, /* pNext */
1280 SQLITEASYNC_VFSNAME, /* zName */
1281 0, /* pAppData */
1282 asyncOpen, /* xOpen */
1283 asyncDelete, /* xDelete */
1284 asyncAccess, /* xAccess */
1285 asyncFullPathname, /* xFullPathname */
1286 asyncDlOpen, /* xDlOpen */
1287 asyncDlError, /* xDlError */
1288 asyncDlSym, /* xDlSym */
1289 asyncDlClose, /* xDlClose */
1290 asyncRandomness, /* xDlError */
1291 asyncSleep, /* xDlSym */
1292 asyncCurrentTime /* xDlClose */
1293};
1294
1295/*
1296** This procedure runs in a separate thread, reading messages off of the
1297** write queue and processing them one by one.
1298**
1299** If async.writerHaltNow is true, then this procedure exits
1300** after processing a single message.
1301**
1302** If async.writerHaltWhenIdle is true, then this procedure exits when
1303** the write queue is empty.
1304**
1305** If both of the above variables are false, this procedure runs
1306** indefinately, waiting for operations to be added to the write queue
1307** and processing them in the order in which they arrive.
1308**
1309** An artifical delay of async.ioDelay milliseconds is inserted before
1310** each write operation in order to simulate the effect of a slow disk.
1311**
1312** Only one instance of this procedure may be running at a time.
1313*/
1314static void asyncWriterThread(void){
1315 sqlite3_vfs *pVfs = (sqlite3_vfs *)(async_vfs.pAppData);
1316 AsyncWrite *p = 0;
1317 int rc = SQLITE_OK;
1318 int holdingMutex = 0;
1319
1320 async_mutex_enter(ASYNC_MUTEX_WRITER);
1321
1322 while( async.eHalt!=SQLITEASYNC_HALT_NOW ){
1323 int doNotFree = 0;
1324 sqlite3_file *pBase = 0;
1325
1326 if( !holdingMutex ){
1327 async_mutex_enter(ASYNC_MUTEX_QUEUE);
1328 }
1329 while( (p = async.pQueueFirst)==0 ){
1330 if( async.eHalt!=SQLITEASYNC_HALT_NEVER ){
1331 async_mutex_leave(ASYNC_MUTEX_QUEUE);
1332 break;
1333 }else{
1334 ASYNC_TRACE(("IDLE\n"));
1335 async_cond_wait(ASYNC_COND_QUEUE, ASYNC_MUTEX_QUEUE);
1336 ASYNC_TRACE(("WAKEUP\n"));
1337 }
1338 }
1339 if( p==0 ) break;
1340 holdingMutex = 1;
1341
1342 /* Right now this thread is holding the mutex on the write-op queue.
1343 ** Variable 'p' points to the first entry in the write-op queue. In
1344 ** the general case, we hold on to the mutex for the entire body of
1345 ** the loop.
1346 **
1347 ** However in the cases enumerated below, we relinquish the mutex,
1348 ** perform the IO, and then re-request the mutex before removing 'p' from
1349 ** the head of the write-op queue. The idea is to increase concurrency with
1350 ** sqlite threads.
1351 **
1352 ** * An ASYNC_CLOSE operation.
1353 ** * An ASYNC_OPENEXCLUSIVE operation. For this one, we relinquish
1354 ** the mutex, call the underlying xOpenExclusive() function, then
1355 ** re-aquire the mutex before seting the AsyncFile.pBaseRead
1356 ** variable.
1357 ** * ASYNC_SYNC and ASYNC_WRITE operations, if
1358 ** SQLITE_ASYNC_TWO_FILEHANDLES was set at compile time and two
1359 ** file-handles are open for the particular file being "synced".
1360 */
1361 if( async.ioError!=SQLITE_OK && p->op!=ASYNC_CLOSE ){
1362 p->op = ASYNC_NOOP;
1363 }
1364 if( p->pFileData ){
1365 pBase = p->pFileData->pBaseWrite;
1366 if(
1367 p->op==ASYNC_CLOSE ||
1368 p->op==ASYNC_OPENEXCLUSIVE ||
1369 (pBase->pMethods && (p->op==ASYNC_SYNC || p->op==ASYNC_WRITE) )
1370 ){
1371 async_mutex_leave(ASYNC_MUTEX_QUEUE);
1372 holdingMutex = 0;
1373 }
1374 if( !pBase->pMethods ){
1375 pBase = p->pFileData->pBaseRead;
1376 }
1377 }
1378
1379 switch( p->op ){
1380 case ASYNC_NOOP:
1381 break;
1382
1383 case ASYNC_WRITE:
1384 assert( pBase );
1385 ASYNC_TRACE(("WRITE %s %d bytes at %d\n",
1386 p->pFileData->zName, p->nByte, p->iOffset));
1387 rc = pBase->pMethods->xWrite(pBase, (void *)(p->zBuf), p->nByte, p->iOffset);
1388 break;
1389
1390 case ASYNC_SYNC:
1391 assert( pBase );
1392 ASYNC_TRACE(("SYNC %s\n", p->pFileData->zName));
1393 rc = pBase->pMethods->xSync(pBase, p->nByte);
1394 break;
1395
1396 case ASYNC_TRUNCATE:
1397 assert( pBase );
1398 ASYNC_TRACE(("TRUNCATE %s to %d bytes\n",
1399 p->pFileData->zName, p->iOffset));
1400 rc = pBase->pMethods->xTruncate(pBase, p->iOffset);
1401 break;
1402
1403 case ASYNC_CLOSE: {
1404 AsyncFileData *pData = p->pFileData;
1405 ASYNC_TRACE(("CLOSE %s\n", p->pFileData->zName));
1406 if( pData->pBaseWrite->pMethods ){
1407 pData->pBaseWrite->pMethods->xClose(pData->pBaseWrite);
1408 }
1409 if( pData->pBaseRead->pMethods ){
1410 pData->pBaseRead->pMethods->xClose(pData->pBaseRead);
1411 }
1412
1413 /* Unlink AsyncFileData.lock from the linked list of AsyncFileLock
1414 ** structures for this file. Obtain the async.lockMutex mutex
1415 ** before doing so.
1416 */
1417 async_mutex_enter(ASYNC_MUTEX_LOCK);
1418 rc = unlinkAsyncFile(pData);
1419 async_mutex_leave(ASYNC_MUTEX_LOCK);
1420
1421 if( !holdingMutex ){
1422 async_mutex_enter(ASYNC_MUTEX_QUEUE);
1423 holdingMutex = 1;
1424 }
1425 assert_mutex_is_held(ASYNC_MUTEX_QUEUE);
1426 async.pQueueFirst = p->pNext;
1427 sqlite3_free(pData);
1428 doNotFree = 1;
1429 break;
1430 }
1431
1432 case ASYNC_UNLOCK: {
1433 AsyncWrite *pIter;
1434 AsyncFileData *pData = p->pFileData;
1435 int eLock = p->nByte;
1436
1437 /* When a file is locked by SQLite using the async backend, it is
1438 ** locked within the 'real' file-system synchronously. When it is
1439 ** unlocked, an ASYNC_UNLOCK event is added to the write-queue to
1440 ** unlock the file asynchronously. The design of the async backend
1441 ** requires that the 'real' file-system file be locked from the
1442 ** time that SQLite first locks it (and probably reads from it)
1443 ** until all asynchronous write events that were scheduled before
1444 ** SQLite unlocked the file have been processed.
1445 **
1446 ** This is more complex if SQLite locks and unlocks the file multiple
1447 ** times in quick succession. For example, if SQLite does:
1448 **
1449 ** lock, write, unlock, lock, write, unlock
1450 **
1451 ** Each "lock" operation locks the file immediately. Each "write"
1452 ** and "unlock" operation adds an event to the event queue. If the
1453 ** second "lock" operation is performed before the first "unlock"
1454 ** operation has been processed asynchronously, then the first
1455 ** "unlock" cannot be safely processed as is, since this would mean
1456 ** the file was unlocked when the second "write" operation is
1457 ** processed. To work around this, when processing an ASYNC_UNLOCK
1458 ** operation, SQLite:
1459 **
1460 ** 1) Unlocks the file to the minimum of the argument passed to
1461 ** the xUnlock() call and the current lock from SQLite's point
1462 ** of view, and
1463 **
1464 ** 2) Only unlocks the file at all if this event is the last
1465 ** ASYNC_UNLOCK event on this file in the write-queue.
1466 */
1467 assert( holdingMutex==1 );
1468 assert( async.pQueueFirst==p );
1469 for(pIter=async.pQueueFirst->pNext; pIter; pIter=pIter->pNext){
1470 if( pIter->pFileData==pData && pIter->op==ASYNC_UNLOCK ) break;
1471 }
1472 if( !pIter ){
1473 async_mutex_enter(ASYNC_MUTEX_LOCK);
1474 pData->lock.eAsyncLock = MIN(
1475 pData->lock.eAsyncLock, MAX(pData->lock.eLock, eLock)
1476 );
1477 assert(pData->lock.eAsyncLock>=pData->lock.eLock);
1478 rc = getFileLock(pData->pLock);
1479 async_mutex_leave(ASYNC_MUTEX_LOCK);
1480 }
1481 break;
1482 }
1483
1484 case ASYNC_DELETE:
1485 ASYNC_TRACE(("DELETE %s\n", p->zBuf));
1486 rc = pVfs->xDelete(pVfs, p->zBuf, (int)p->iOffset);
1487 break;
1488
1489 case ASYNC_OPENEXCLUSIVE: {
1490 int flags = (int)p->iOffset;
1491 AsyncFileData *pData = p->pFileData;
1492 ASYNC_TRACE(("OPEN %s flags=%d\n", p->zBuf, (int)p->iOffset));
1493 assert(pData->pBaseRead->pMethods==0 && pData->pBaseWrite->pMethods==0);
1494 rc = pVfs->xOpen(pVfs, pData->zName, pData->pBaseRead, flags, 0);
1495 assert( holdingMutex==0 );
1496 async_mutex_enter(ASYNC_MUTEX_QUEUE);
1497 holdingMutex = 1;
1498 break;
1499 }
1500
1501 default: assert(!"Illegal value for AsyncWrite.op");
1502 }
1503
1504 /* If we didn't hang on to the mutex during the IO op, obtain it now
1505 ** so that the AsyncWrite structure can be safely removed from the
1506 ** global write-op queue.
1507 */
1508 if( !holdingMutex ){
1509 async_mutex_enter(ASYNC_MUTEX_QUEUE);
1510 holdingMutex = 1;
1511 }
1512 /* ASYNC_TRACE(("UNLINK %p\n", p)); */
1513 if( p==async.pQueueLast ){
1514 async.pQueueLast = 0;
1515 }
1516 if( !doNotFree ){
1517 assert_mutex_is_held(ASYNC_MUTEX_QUEUE);
1518 async.pQueueFirst = p->pNext;
1519 sqlite3_free(p);
1520 }
1521 assert( holdingMutex );
1522
1523 /* An IO error has occurred. We cannot report the error back to the
1524 ** connection that requested the I/O since the error happened
1525 ** asynchronously. The connection has already moved on. There
1526 ** really is nobody to report the error to.
1527 **
1528 ** The file for which the error occurred may have been a database or
1529 ** journal file. Regardless, none of the currently queued operations
1530 ** associated with the same database should now be performed. Nor should
1531 ** any subsequently requested IO on either a database or journal file
1532 ** handle for the same database be accepted until the main database
1533 ** file handle has been closed and reopened.
1534 **
1535 ** Furthermore, no further IO should be queued or performed on any file
1536 ** handle associated with a database that may have been part of a
1537 ** multi-file transaction that included the database associated with
1538 ** the IO error (i.e. a database ATTACHed to the same handle at some
1539 ** point in time).
1540 */
1541 if( rc!=SQLITE_OK ){
1542 async.ioError = rc;
1543 }
1544
1545 if( async.ioError && !async.pQueueFirst ){
1546 async_mutex_enter(ASYNC_MUTEX_LOCK);
1547 if( 0==async.pLock ){
1548 async.ioError = SQLITE_OK;
1549 }
1550 async_mutex_leave(ASYNC_MUTEX_LOCK);
1551 }
1552
1553 /* Drop the queue mutex before continuing to the next write operation
1554 ** in order to give other threads a chance to work with the write queue.
1555 */
1556 if( !async.pQueueFirst || !async.ioError ){
1557 async_mutex_leave(ASYNC_MUTEX_QUEUE);
1558 holdingMutex = 0;
1559 if( async.ioDelay>0 ){
danielk19776f050aa2009-04-25 08:39:14 +00001560 pVfs->xSleep(pVfs, async.ioDelay*1000);
danielk1977a3f06592009-04-23 14:58:39 +00001561 }else{
1562 async_sched_yield();
1563 }
1564 }
1565 }
1566
1567 async_mutex_leave(ASYNC_MUTEX_WRITER);
1568 return;
1569}
1570
1571/*
1572** Install the asynchronous VFS.
1573*/
1574int sqlite3async_initialize(const char *zParent, int isDefault){
1575 int rc = SQLITE_OK;
1576 if( async_vfs.pAppData==0 ){
1577 sqlite3_vfs *pParent = sqlite3_vfs_find(zParent);
1578 if( !pParent || async_os_initialize() ){
1579 rc = SQLITE_ERROR;
1580 }else if( SQLITE_OK!=(rc = sqlite3_vfs_register(&async_vfs, isDefault)) ){
1581 async_os_shutdown();
1582 }else{
1583 async_vfs.pAppData = (void *)pParent;
1584 async_vfs.mxPathname = ((sqlite3_vfs *)async_vfs.pAppData)->mxPathname;
1585 }
1586 }
1587 return rc;
1588}
1589
1590/*
1591** Uninstall the asynchronous VFS.
1592*/
1593void sqlite3async_shutdown(void){
1594 if( async_vfs.pAppData ){
1595 async_os_shutdown();
1596 sqlite3_vfs_unregister((sqlite3_vfs *)&async_vfs);
1597 async_vfs.pAppData = 0;
1598 }
1599}
1600
1601/*
1602** Process events on the write-queue.
1603*/
1604void sqlite3async_run(void){
1605 asyncWriterThread();
1606}
1607
1608/*
1609** Control/configure the asynchronous IO system.
1610*/
1611int sqlite3async_control(int op, ...){
1612 va_list ap;
1613 va_start(ap, op);
1614 switch( op ){
1615 case SQLITEASYNC_HALT: {
1616 int eWhen = va_arg(ap, int);
1617 if( eWhen!=SQLITEASYNC_HALT_NEVER
1618 && eWhen!=SQLITEASYNC_HALT_NOW
1619 && eWhen!=SQLITEASYNC_HALT_IDLE
1620 ){
danielk19774598b8e2009-04-24 10:13:05 +00001621 return SQLITE_MISUSE;
danielk1977a3f06592009-04-23 14:58:39 +00001622 }
1623 async.eHalt = eWhen;
1624 async_mutex_enter(ASYNC_MUTEX_QUEUE);
1625 async_cond_signal(ASYNC_COND_QUEUE);
1626 async_mutex_leave(ASYNC_MUTEX_QUEUE);
1627 break;
1628 }
1629
1630 case SQLITEASYNC_DELAY: {
1631 int iDelay = va_arg(ap, int);
danielk19774598b8e2009-04-24 10:13:05 +00001632 if( iDelay<0 ){
1633 return SQLITE_MISUSE;
1634 }
danielk1977a3f06592009-04-23 14:58:39 +00001635 async.ioDelay = iDelay;
1636 break;
1637 }
danielk19774598b8e2009-04-24 10:13:05 +00001638
1639 case SQLITEASYNC_LOCKFILES: {
1640 int bLock = va_arg(ap, int);
1641 async_mutex_enter(ASYNC_MUTEX_QUEUE);
1642 if( async.nFile || async.pQueueFirst ){
1643 async_mutex_leave(ASYNC_MUTEX_QUEUE);
1644 return SQLITE_MISUSE;
1645 }
1646 async.bLockFiles = bLock;
1647 async_mutex_leave(ASYNC_MUTEX_QUEUE);
1648 break;
1649 }
danielk1977a3f06592009-04-23 14:58:39 +00001650
1651 case SQLITEASYNC_GET_HALT: {
1652 int *peWhen = va_arg(ap, int *);
1653 *peWhen = async.eHalt;
1654 break;
1655 }
1656 case SQLITEASYNC_GET_DELAY: {
1657 int *piDelay = va_arg(ap, int *);
1658 *piDelay = async.ioDelay;
1659 break;
1660 }
danielk19774598b8e2009-04-24 10:13:05 +00001661 case SQLITEASYNC_GET_LOCKFILES: {
1662 int *piDelay = va_arg(ap, int *);
1663 *piDelay = async.bLockFiles;
1664 break;
1665 }
danielk1977a3f06592009-04-23 14:58:39 +00001666
1667 default:
1668 return SQLITE_ERROR;
1669 }
1670 return SQLITE_OK;
1671}
1672
1673#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_ASYNCIO) */
1674