blob: f1fdb8d87f78c4cc201330b49448dda9a89d6df3 [file] [log] [blame]
danielk1977a3f06592009-04-23 14:58:39 +00001/*
2** 2005 December 14
3**
4** The author disclaims copyright to this source code. In place of
5** a legal notice, here is a blessing:
6**
7** May you do good and not evil.
8** May you find forgiveness for yourself and forgive others.
9** May you share freely, never taking more than you give.
10**
11*************************************************************************
12**
danielk1977debcfd22009-04-24 09:27:16 +000013** $Id: sqlite3async.c,v 1.2 2009/04/24 09:27:16 danielk1977 Exp $
danielk1977a3f06592009-04-23 14:58:39 +000014**
danielk1977debcfd22009-04-24 09:27:16 +000015** This file contains the implementation of an asynchronous IO backend
16** for SQLite.
danielk1977a3f06592009-04-23 14:58:39 +000017*/
18
19#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_ASYNCIO)
20
21#include "sqlite3async.h"
22
23#define ENABLE_FILE_LOCKING
24
danielk1977a3f06592009-04-23 14:58:39 +000025/* Useful macros used in several places */
26#define MIN(x,y) ((x)<(y)?(x):(y))
27#define MAX(x,y) ((x)>(y)?(x):(y))
28
29/* Forward references */
30typedef struct AsyncWrite AsyncWrite;
31typedef struct AsyncFile AsyncFile;
32typedef struct AsyncFileData AsyncFileData;
33typedef struct AsyncFileLock AsyncFileLock;
34typedef struct AsyncLock AsyncLock;
35
36/* Enable for debugging */
37static int sqlite3async_trace = 0;
38# define ASYNC_TRACE(X) if( sqlite3async_trace ) asyncTrace X
39static void asyncTrace(const char *zFormat, ...){
40 char *z;
41 va_list ap;
42 va_start(ap, zFormat);
43 z = sqlite3_vmprintf(zFormat, ap);
44 va_end(ap);
45 fprintf(stderr, "[%d] %s", 0 /* (int)pthread_self() */, z);
46 sqlite3_free(z);
47}
48
49/*
50** THREAD SAFETY NOTES
51**
52** Basic rules:
53**
54** * Both read and write access to the global write-op queue must be
55** protected by the async.queueMutex. As are the async.ioError and
56** async.nFile variables.
57**
58** * The async.pLock list and all AsyncLock and AsyncFileLock
59** structures must be protected by the async.lockMutex mutex.
60**
61** * The file handles from the underlying system are not assumed to
62** be thread safe.
63**
64** * See the last two paragraphs under "The Writer Thread" for
65** an assumption to do with file-handle synchronization by the Os.
66**
67** Deadlock prevention:
68**
69** There are three mutex used by the system: the "writer" mutex,
70** the "queue" mutex and the "lock" mutex. Rules are:
71**
72** * It is illegal to block on the writer mutex when any other mutex
73** are held, and
74**
75** * It is illegal to block on the queue mutex when the lock mutex
76** is held.
77**
78** i.e. mutex's must be grabbed in the order "writer", "queue", "lock".
79**
80** File system operations (invoked by SQLite thread):
81**
82** xOpen
83** xDelete
84** xFileExists
85**
86** File handle operations (invoked by SQLite thread):
87**
88** asyncWrite, asyncClose, asyncTruncate, asyncSync
89**
90** The operations above add an entry to the global write-op list. They
91** prepare the entry, acquire the async.queueMutex momentarily while
92** list pointers are manipulated to insert the new entry, then release
93** the mutex and signal the writer thread to wake up in case it happens
94** to be asleep.
95**
96**
97** asyncRead, asyncFileSize.
98**
99** Read operations. Both of these read from both the underlying file
100** first then adjust their result based on pending writes in the
101** write-op queue. So async.queueMutex is held for the duration
102** of these operations to prevent other threads from changing the
103** queue in mid operation.
104**
105**
106** asyncLock, asyncUnlock, asyncCheckReservedLock
107**
108** These primitives implement in-process locking using a hash table
109** on the file name. Files are locked correctly for connections coming
110** from the same process. But other processes cannot see these locks
111** and will therefore not honor them.
112**
113**
114** The writer thread:
115**
116** The async.writerMutex is used to make sure only there is only
117** a single writer thread running at a time.
118**
119** Inside the writer thread is a loop that works like this:
120**
121** WHILE (write-op list is not empty)
122** Do IO operation at head of write-op list
123** Remove entry from head of write-op list
124** END WHILE
125**
126** The async.queueMutex is always held during the <write-op list is
127** not empty> test, and when the entry is removed from the head
128** of the write-op list. Sometimes it is held for the interim
129** period (while the IO is performed), and sometimes it is
130** relinquished. It is relinquished if (a) the IO op is an
131** ASYNC_CLOSE or (b) when the file handle was opened, two of
132** the underlying systems handles were opened on the same
133** file-system entry.
134**
135** If condition (b) above is true, then one file-handle
136** (AsyncFile.pBaseRead) is used exclusively by sqlite threads to read the
137** file, the other (AsyncFile.pBaseWrite) by sqlite3_async_flush()
138** threads to perform write() operations. This means that read
139** operations are not blocked by asynchronous writes (although
140** asynchronous writes may still be blocked by reads).
141**
142** This assumes that the OS keeps two handles open on the same file
143** properly in sync. That is, any read operation that starts after a
144** write operation on the same file system entry has completed returns
145** data consistent with the write. We also assume that if one thread
146** reads a file while another is writing it all bytes other than the
147** ones actually being written contain valid data.
148**
149** If the above assumptions are not true, set the preprocessor symbol
150** SQLITE_ASYNC_TWO_FILEHANDLES to 0.
151*/
152
153
154#ifndef NDEBUG
155# define TESTONLY( X ) X
156#else
157# define TESTONLY( X )
158#endif
159
160/*
danielk1977debcfd22009-04-24 09:27:16 +0000161** PORTING FUNCTIONS
162**
danielk1977a3f06592009-04-23 14:58:39 +0000163** There are two definitions of the following functions. One for pthreads
164** compatible systems and one for Win32. These functions isolate the OS
165** specific code required by each platform.
166**
167** The system uses three mutexes and a single condition variable. To
168** block on a mutex, async_mutex_enter() is called. The parameter passed
169** to async_mutex_enter(), which must be one of ASYNC_MUTEX_LOCK,
170** ASYNC_MUTEX_QUEUE or ASYNC_MUTEX_WRITER, identifies which of the three
171** mutexes to lock. Similarly, to unlock a mutex, async_mutex_leave() is
172** called with a parameter identifying the mutex being unlocked. Mutexes
173** are not recursive - it is an error to call async_mutex_enter() to
174** lock a mutex that is already locked, or to call async_mutex_leave()
175** to unlock a mutex that is not currently locked.
176**
177** The async_cond_wait() and async_cond_signal() functions are modelled
178** on the pthreads functions with similar names. The first parameter to
179** both functions is always ASYNC_COND_QUEUE. When async_cond_wait()
180** is called the mutex identified by the second parameter must be held.
181** The mutex is unlocked, and the calling thread simultaneously begins
182** waiting for the condition variable to be signalled by another thread.
183** After another thread signals the condition variable, the calling
184** thread stops waiting, locks mutex eMutex and returns. The
185** async_cond_signal() function is used to signal the condition variable.
186** It is assumed that the mutex used by the thread calling async_cond_wait()
187** is held by the caller of async_cond_signal() (otherwise there would be
188** a race condition).
189**
190** It is guaranteed that no other thread will call async_cond_wait() when
191** there is already a thread waiting on the condition variable.
192**
193** The async_sched_yield() function is called to suggest to the operating
194** system that it would be a good time to shift the current thread off the
195** CPU. The system will still work if this function is not implemented
196** (it is not currently implemented for win32), but it might be marginally
197** more efficient if it is.
198*/
199static void async_mutex_enter(int eMutex);
200static void async_mutex_leave(int eMutex);
201static void async_cond_wait(int eCond, int eMutex);
202static void async_cond_signal(int eCond);
203static void async_sched_yield(void);
204
205/*
206** There are also two definitions of the following. async_os_initialize()
207** is called when the asynchronous VFS is first installed, and os_shutdown()
208** is called when it is uninstalled (from within sqlite3async_shutdown()).
209**
210** For pthreads builds, both of these functions are no-ops. For win32,
211** they provide an opportunity to initialize and finalize the required
212** mutex and condition variables.
213**
214** If async_os_initialize() returns other than zero, then the initialization
215** fails and SQLITE_ERROR is returned to the user.
216*/
217static int async_os_initialize(void);
218static void async_os_shutdown(void);
219
220/* Values for use as the 'eMutex' argument of the above functions. The
221** integer values assigned to these constants are important for assert()
222** statements that verify that mutexes are locked in the correct order.
223** Specifically, it is unsafe to try to lock mutex N while holding a lock
224** on mutex M if (M<=N).
225*/
226#define ASYNC_MUTEX_LOCK 0
227#define ASYNC_MUTEX_QUEUE 1
228#define ASYNC_MUTEX_WRITER 2
229
230/* Values for use as the 'eCond' argument of the above functions. */
231#define ASYNC_COND_QUEUE 0
232
233/*************************************************************************
234** Start of OS specific code.
235*/
236#if SQLITE_OS_WIN || defined(_WIN32) || defined(WIN32) || defined(__CYGWIN__) || defined(__MINGW32__) || defined(__BORLANDC__)
237
238/* The following block contains the win32 specific code. */
239
240#define mutex_held(X) (GetCurrentThreadId()==primitives.aHolder[X])
241
242static struct AsyncPrimitives {
243 int isInit;
244 DWORD aHolder[3];
245 CRITICAL_SECTION aMutex[3];
246 HANDLE aCond[1];
247} primitives = { 0 };
248
249static int async_os_initialize(void){
250 if( !primitives.isInit ){
251 primitives.aCond[0] = CreateEvent(NULL, TRUE, FALSE, 0);
252 if( primitives.aCond[0]==NULL ){
253 return 1;
254 }
255 InitializeCriticalSection(&primitives.aMutex[0]);
256 InitializeCriticalSection(&primitives.aMutex[1]);
257 InitializeCriticalSection(&primitives.aMutex[2]);
258 primitives.isInit = 1;
259 }
260 return 0;
261}
262static void async_os_shutdown(void){
263 if( primitives.isInit ){
264 DeleteCriticalSection(&primitives.aMutex[0]);
265 DeleteCriticalSection(&primitives.aMutex[1]);
266 DeleteCriticalSection(&primitives.aMutex[2]);
267 CloseHandle(primitives.aCond[0]);
268 primitives.isInit = 0;
269 }
270}
271
272/* The following block contains the Win32 specific code. */
273static void async_mutex_enter(int eMutex){
274 assert( eMutex==0 || eMutex==1 || eMutex==2 );
275 assert( eMutex!=2 || (!mutex_held(0) && !mutex_held(1) && !mutex_held(2)) );
276 assert( eMutex!=1 || (!mutex_held(0) && !mutex_held(1)) );
277 assert( eMutex!=0 || (!mutex_held(0)) );
278 EnterCriticalSection(&primitives.aMutex[eMutex]);
279 TESTONLY( primitives.aHolder[eMutex] = GetCurrentThreadId(); )
280}
281static void async_mutex_leave(int eMutex){
282 assert( eMutex==0 || eMutex==1 || eMutex==2 );
283 assert( mutex_held(eMutex) );
284 TESTONLY( primitives.aHolder[eMutex] = 0; )
285 LeaveCriticalSection(&primitives.aMutex[eMutex]);
286}
287static void async_cond_wait(int eCond, int eMutex){
288 ResetEvent(primitives.aCond[eCond]);
289 async_mutex_leave(eMutex);
290 WaitForSingleObject(primitives.aCond[eCond], INFINITE);
291 async_mutex_enter(eMutex);
292}
293static void async_cond_signal(int eCond){
294 assert( mutex_held(ASYNC_MUTEX_QUEUE) );
295 SetEvent(primitives.aCond[eCond]);
296}
297static void async_sched_yield(void){
298 /* Todo: Find out if win32 offers anything like sched_yield() */
299}
300#else
301
302/* The following block contains the pthreads specific code. */
303#include <pthread.h>
304#include <sched.h>
305
306#define mutex_held(X) pthread_equal(primitives.aHolder[X], pthread_self())
307
308static int async_os_initialize(void) {return 0;}
309static void async_os_shutdown(void) {}
310
311static struct AsyncPrimitives {
312 pthread_mutex_t aMutex[3];
313 pthread_cond_t aCond[1];
314 pthread_t aHolder[3];
315} primitives = {
316 { PTHREAD_MUTEX_INITIALIZER,
317 PTHREAD_MUTEX_INITIALIZER,
318 PTHREAD_MUTEX_INITIALIZER
319 } , {
320 PTHREAD_COND_INITIALIZER
321 } , { 0, 0, 0 }
322};
323
324static void async_mutex_enter(int eMutex){
325 assert( eMutex==0 || eMutex==1 || eMutex==2 );
326 assert( eMutex!=2 || (!mutex_held(0) && !mutex_held(1) && !mutex_held(2)) );
327 assert( eMutex!=1 || (!mutex_held(0) && !mutex_held(1)) );
328 assert( eMutex!=0 || (!mutex_held(0)) );
329 pthread_mutex_lock(&primitives.aMutex[eMutex]);
330 TESTONLY( primitives.aHolder[eMutex] = pthread_self(); )
331}
332static void async_mutex_leave(int eMutex){
333 assert( eMutex==0 || eMutex==1 || eMutex==2 );
334 assert( mutex_held(eMutex) );
335 TESTONLY( primitives.aHolder[eMutex] = 0; )
336 pthread_mutex_unlock(&primitives.aMutex[eMutex]);
337}
338static void async_cond_wait(int eCond, int eMutex){
339 assert( eMutex==0 || eMutex==1 || eMutex==2 );
340 assert( mutex_held(eMutex) );
341 TESTONLY( primitives.aHolder[eMutex] = 0; )
342 pthread_cond_wait(&primitives.aCond[eCond], &primitives.aMutex[eMutex]);
343 TESTONLY( primitives.aHolder[eMutex] = pthread_self(); )
344}
345static void async_cond_signal(int eCond){
346 assert( mutex_held(ASYNC_MUTEX_QUEUE) );
347 pthread_cond_signal(&primitives.aCond[eCond]);
348}
349static void async_sched_yield(void){
350 sched_yield();
351}
352#endif
353/*
354** End of OS specific code.
355*************************************************************************/
356
357#define assert_mutex_is_held(X) assert( mutex_held(X) )
358
359
360#ifndef SQLITE_ASYNC_TWO_FILEHANDLES
361/* #define SQLITE_ASYNC_TWO_FILEHANDLES 0 */
362#define SQLITE_ASYNC_TWO_FILEHANDLES 1
363#endif
364
365/*
366** State information is held in the static variable "async" defined
367** as the following structure.
368**
369** Both async.ioError and async.nFile are protected by async.queueMutex.
370*/
371static struct TestAsyncStaticData {
372 AsyncWrite *pQueueFirst; /* Next write operation to be processed */
373 AsyncWrite *pQueueLast; /* Last write operation on the list */
374 AsyncLock *pLock; /* Linked list of all AsyncLock structures */
375 volatile int ioDelay; /* Extra delay between write operations */
376 volatile int eHalt; /* One of the SQLITEASYNC_HALT_XXX values */
377 int ioError; /* True if an IO error has occurred */
378 int nFile; /* Number of open files (from sqlite pov) */
379} async = { 0,0,0,0,0,0,0 };
380
381/* Possible values of AsyncWrite.op */
382#define ASYNC_NOOP 0
383#define ASYNC_WRITE 1
384#define ASYNC_SYNC 2
385#define ASYNC_TRUNCATE 3
386#define ASYNC_CLOSE 4
387#define ASYNC_DELETE 5
388#define ASYNC_OPENEXCLUSIVE 6
389#define ASYNC_UNLOCK 7
390
391/* Names of opcodes. Used for debugging only.
392** Make sure these stay in sync with the macros above!
393*/
394static const char *azOpcodeName[] = {
395 "NOOP", "WRITE", "SYNC", "TRUNCATE", "CLOSE", "DELETE", "OPENEX", "UNLOCK"
396};
397
398/*
399** Entries on the write-op queue are instances of the AsyncWrite
400** structure, defined here.
401**
402** The interpretation of the iOffset and nByte variables varies depending
403** on the value of AsyncWrite.op:
404**
405** ASYNC_NOOP:
406** No values used.
407**
408** ASYNC_WRITE:
409** iOffset -> Offset in file to write to.
410** nByte -> Number of bytes of data to write (pointed to by zBuf).
411**
412** ASYNC_SYNC:
413** nByte -> flags to pass to sqlite3OsSync().
414**
415** ASYNC_TRUNCATE:
416** iOffset -> Size to truncate file to.
417** nByte -> Unused.
418**
419** ASYNC_CLOSE:
420** iOffset -> Unused.
421** nByte -> Unused.
422**
423** ASYNC_DELETE:
424** iOffset -> Contains the "syncDir" flag.
425** nByte -> Number of bytes of zBuf points to (file name).
426**
427** ASYNC_OPENEXCLUSIVE:
428** iOffset -> Value of "delflag".
429** nByte -> Number of bytes of zBuf points to (file name).
430**
431** ASYNC_UNLOCK:
432** nByte -> Argument to sqlite3OsUnlock().
433**
434**
435** For an ASYNC_WRITE operation, zBuf points to the data to write to the file.
436** This space is sqlite3_malloc()d along with the AsyncWrite structure in a
437** single blob, so is deleted when sqlite3_free() is called on the parent
438** structure.
439*/
440struct AsyncWrite {
441 AsyncFileData *pFileData; /* File to write data to or sync */
442 int op; /* One of ASYNC_xxx etc. */
443 sqlite_int64 iOffset; /* See above */
444 int nByte; /* See above */
445 char *zBuf; /* Data to write to file (or NULL if op!=ASYNC_WRITE) */
446 AsyncWrite *pNext; /* Next write operation (to any file) */
447};
448
449/*
450** An instance of this structure is created for each distinct open file
451** (i.e. if two handles are opened on the one file, only one of these
452** structures is allocated) and stored in the async.aLock hash table. The
453** keys for async.aLock are the full pathnames of the opened files.
454**
455** AsyncLock.pList points to the head of a linked list of AsyncFileLock
456** structures, one for each handle currently open on the file.
457**
458** If the opened file is not a main-database (the SQLITE_OPEN_MAIN_DB is
459** not passed to the sqlite3OsOpen() call), or if ENABLE_FILE_LOCKING is
460** not defined at compile time, variables AsyncLock.pFile and
461** AsyncLock.eLock are never used. Otherwise, pFile is a file handle
462** opened on the file in question and used to obtain the file-system
463** locks required by database connections within this process.
464**
465** See comments above the asyncLock() function for more details on
466** the implementation of database locking used by this backend.
467*/
468struct AsyncLock {
469 char *zFile;
470 int nFile;
471 sqlite3_file *pFile;
472 int eLock;
473 AsyncFileLock *pList;
474 AsyncLock *pNext; /* Next in linked list headed by async.pLock */
475};
476
477/*
478** An instance of the following structure is allocated along with each
479** AsyncFileData structure (see AsyncFileData.lock), but is only used if the
480** file was opened with the SQLITE_OPEN_MAIN_DB.
481*/
482struct AsyncFileLock {
483 int eLock; /* Internally visible lock state (sqlite pov) */
484 int eAsyncLock; /* Lock-state with write-queue unlock */
485 AsyncFileLock *pNext;
486};
487
488/*
489** The AsyncFile structure is a subclass of sqlite3_file used for
490** asynchronous IO.
491**
492** All of the actual data for the structure is stored in the structure
493** pointed to by AsyncFile.pData, which is allocated as part of the
494** sqlite3OsOpen() using sqlite3_malloc(). The reason for this is that the
495** lifetime of the AsyncFile structure is ended by the caller after OsClose()
496** is called, but the data in AsyncFileData may be required by the
497** writer thread after that point.
498*/
499struct AsyncFile {
500 sqlite3_io_methods *pMethod;
501 AsyncFileData *pData;
502};
503struct AsyncFileData {
504 char *zName; /* Underlying OS filename - used for debugging */
505 int nName; /* Number of characters in zName */
506 sqlite3_file *pBaseRead; /* Read handle to the underlying Os file */
507 sqlite3_file *pBaseWrite; /* Write handle to the underlying Os file */
508 AsyncFileLock lock; /* Lock state for this handle */
509 AsyncLock *pLock; /* AsyncLock object for this file system entry */
510 AsyncWrite closeOp; /* Preallocated close operation */
511};
512
513/*
514** Add an entry to the end of the global write-op list. pWrite should point
515** to an AsyncWrite structure allocated using sqlite3_malloc(). The writer
516** thread will call sqlite3_free() to free the structure after the specified
517** operation has been completed.
518**
519** Once an AsyncWrite structure has been added to the list, it becomes the
520** property of the writer thread and must not be read or modified by the
521** caller.
522*/
523static void addAsyncWrite(AsyncWrite *pWrite){
524 /* We must hold the queue mutex in order to modify the queue pointers */
525 if( pWrite->op!=ASYNC_UNLOCK ){
526 async_mutex_enter(ASYNC_MUTEX_QUEUE);
527 }
528
529 /* Add the record to the end of the write-op queue */
530 assert( !pWrite->pNext );
531 if( async.pQueueLast ){
532 assert( async.pQueueFirst );
533 async.pQueueLast->pNext = pWrite;
534 }else{
535 async.pQueueFirst = pWrite;
536 }
537 async.pQueueLast = pWrite;
538 ASYNC_TRACE(("PUSH %p (%s %s %d)\n", pWrite, azOpcodeName[pWrite->op],
539 pWrite->pFileData ? pWrite->pFileData->zName : "-", pWrite->iOffset));
540
541 if( pWrite->op==ASYNC_CLOSE ){
542 async.nFile--;
543 }
544
545 /* The writer thread might have been idle because there was nothing
546 ** on the write-op queue for it to do. So wake it up. */
547 async_cond_signal(ASYNC_COND_QUEUE);
548
549 /* Drop the queue mutex */
550 if( pWrite->op!=ASYNC_UNLOCK ){
551 async_mutex_leave(ASYNC_MUTEX_QUEUE);
552 }
553}
554
555/*
556** Increment async.nFile in a thread-safe manner.
557*/
558static void incrOpenFileCount(void){
559 /* We must hold the queue mutex in order to modify async.nFile */
560 async_mutex_enter(ASYNC_MUTEX_QUEUE);
561 if( async.nFile==0 ){
562 async.ioError = SQLITE_OK;
563 }
564 async.nFile++;
565 async_mutex_leave(ASYNC_MUTEX_QUEUE);
566}
567
568/*
569** This is a utility function to allocate and populate a new AsyncWrite
570** structure and insert it (via addAsyncWrite() ) into the global list.
571*/
572static int addNewAsyncWrite(
573 AsyncFileData *pFileData,
574 int op,
575 sqlite3_int64 iOffset,
576 int nByte,
577 const char *zByte
578){
579 AsyncWrite *p;
580 if( op!=ASYNC_CLOSE && async.ioError ){
581 return async.ioError;
582 }
583 p = sqlite3_malloc(sizeof(AsyncWrite) + (zByte?nByte:0));
584 if( !p ){
585 /* The upper layer does not expect operations like OsWrite() to
586 ** return SQLITE_NOMEM. This is partly because under normal conditions
587 ** SQLite is required to do rollback without calling malloc(). So
588 ** if malloc() fails here, treat it as an I/O error. The above
589 ** layer knows how to handle that.
590 */
591 return SQLITE_IOERR;
592 }
593 p->op = op;
594 p->iOffset = iOffset;
595 p->nByte = nByte;
596 p->pFileData = pFileData;
597 p->pNext = 0;
598 if( zByte ){
599 p->zBuf = (char *)&p[1];
600 memcpy(p->zBuf, zByte, nByte);
601 }else{
602 p->zBuf = 0;
603 }
604 addAsyncWrite(p);
605 return SQLITE_OK;
606}
607
608/*
609** Close the file. This just adds an entry to the write-op list, the file is
610** not actually closed.
611*/
612static int asyncClose(sqlite3_file *pFile){
613 AsyncFileData *p = ((AsyncFile *)pFile)->pData;
614
615 /* Unlock the file, if it is locked */
616 async_mutex_enter(ASYNC_MUTEX_LOCK);
617 p->lock.eLock = 0;
618 async_mutex_leave(ASYNC_MUTEX_LOCK);
619
620 addAsyncWrite(&p->closeOp);
621 return SQLITE_OK;
622}
623
624/*
625** Implementation of sqlite3OsWrite() for asynchronous files. Instead of
626** writing to the underlying file, this function adds an entry to the end of
627** the global AsyncWrite list. Either SQLITE_OK or SQLITE_NOMEM may be
628** returned.
629*/
630static int asyncWrite(
631 sqlite3_file *pFile,
632 const void *pBuf,
633 int amt,
634 sqlite3_int64 iOff
635){
636 AsyncFileData *p = ((AsyncFile *)pFile)->pData;
637 return addNewAsyncWrite(p, ASYNC_WRITE, iOff, amt, pBuf);
638}
639
640/*
641** Read data from the file. First we read from the filesystem, then adjust
642** the contents of the buffer based on ASYNC_WRITE operations in the
643** write-op queue.
644**
645** This method holds the mutex from start to finish.
646*/
647static int asyncRead(
648 sqlite3_file *pFile,
649 void *zOut,
650 int iAmt,
651 sqlite3_int64 iOffset
652){
653 AsyncFileData *p = ((AsyncFile *)pFile)->pData;
654 int rc = SQLITE_OK;
655 sqlite3_int64 filesize;
656 int nRead;
657 sqlite3_file *pBase = p->pBaseRead;
658
659 /* Grab the write queue mutex for the duration of the call */
660 async_mutex_enter(ASYNC_MUTEX_QUEUE);
661
662 /* If an I/O error has previously occurred in this virtual file
663 ** system, then all subsequent operations fail.
664 */
665 if( async.ioError!=SQLITE_OK ){
666 rc = async.ioError;
667 goto asyncread_out;
668 }
669
670 if( pBase->pMethods ){
671 rc = pBase->pMethods->xFileSize(pBase, &filesize);
672 if( rc!=SQLITE_OK ){
673 goto asyncread_out;
674 }
675 nRead = MIN(filesize - iOffset, iAmt);
676 if( nRead>0 ){
677 rc = pBase->pMethods->xRead(pBase, zOut, nRead, iOffset);
678 ASYNC_TRACE(("READ %s %d bytes at %d\n", p->zName, nRead, iOffset));
679 }
680 }
681
682 if( rc==SQLITE_OK ){
683 AsyncWrite *pWrite;
684 char *zName = p->zName;
685
686 for(pWrite=async.pQueueFirst; pWrite; pWrite = pWrite->pNext){
687 if( pWrite->op==ASYNC_WRITE && (
688 (pWrite->pFileData==p) ||
689 (zName && pWrite->pFileData->zName==zName)
690 )){
691 int iBeginOut = (pWrite->iOffset-iOffset);
692 int iBeginIn = -iBeginOut;
693 int nCopy;
694
695 if( iBeginIn<0 ) iBeginIn = 0;
696 if( iBeginOut<0 ) iBeginOut = 0;
697 nCopy = MIN(pWrite->nByte-iBeginIn, iAmt-iBeginOut);
698
699 if( nCopy>0 ){
700 memcpy(&((char *)zOut)[iBeginOut], &pWrite->zBuf[iBeginIn], nCopy);
701 ASYNC_TRACE(("OVERREAD %d bytes at %d\n", nCopy, iBeginOut+iOffset));
702 }
703 }
704 }
705 }
706
707asyncread_out:
708 async_mutex_leave(ASYNC_MUTEX_QUEUE);
709 return rc;
710}
711
712/*
713** Truncate the file to nByte bytes in length. This just adds an entry to
714** the write-op list, no IO actually takes place.
715*/
716static int asyncTruncate(sqlite3_file *pFile, sqlite3_int64 nByte){
717 AsyncFileData *p = ((AsyncFile *)pFile)->pData;
718 return addNewAsyncWrite(p, ASYNC_TRUNCATE, nByte, 0, 0);
719}
720
721/*
722** Sync the file. This just adds an entry to the write-op list, the
723** sync() is done later by sqlite3_async_flush().
724*/
725static int asyncSync(sqlite3_file *pFile, int flags){
726 AsyncFileData *p = ((AsyncFile *)pFile)->pData;
727 return addNewAsyncWrite(p, ASYNC_SYNC, 0, flags, 0);
728}
729
730/*
731** Read the size of the file. First we read the size of the file system
732** entry, then adjust for any ASYNC_WRITE or ASYNC_TRUNCATE operations
733** currently in the write-op list.
734**
735** This method holds the mutex from start to finish.
736*/
737int asyncFileSize(sqlite3_file *pFile, sqlite3_int64 *piSize){
738 AsyncFileData *p = ((AsyncFile *)pFile)->pData;
739 int rc = SQLITE_OK;
740 sqlite3_int64 s = 0;
741 sqlite3_file *pBase;
742
743 async_mutex_enter(ASYNC_MUTEX_QUEUE);
744
745 /* Read the filesystem size from the base file. If pBaseRead is NULL, this
746 ** means the file hasn't been opened yet. In this case all relevant data
747 ** must be in the write-op queue anyway, so we can omit reading from the
748 ** file-system.
749 */
750 pBase = p->pBaseRead;
751 if( pBase->pMethods ){
752 rc = pBase->pMethods->xFileSize(pBase, &s);
753 }
754
755 if( rc==SQLITE_OK ){
756 AsyncWrite *pWrite;
757 for(pWrite=async.pQueueFirst; pWrite; pWrite = pWrite->pNext){
758 if( pWrite->op==ASYNC_DELETE
759 && p->zName
760 && strcmp(p->zName, pWrite->zBuf)==0
761 ){
762 s = 0;
763 }else if( pWrite->pFileData && (
764 (pWrite->pFileData==p)
765 || (p->zName && pWrite->pFileData->zName==p->zName)
766 )){
767 switch( pWrite->op ){
768 case ASYNC_WRITE:
769 s = MAX(pWrite->iOffset + (sqlite3_int64)(pWrite->nByte), s);
770 break;
771 case ASYNC_TRUNCATE:
772 s = MIN(s, pWrite->iOffset);
773 break;
774 }
775 }
776 }
777 *piSize = s;
778 }
779 async_mutex_leave(ASYNC_MUTEX_QUEUE);
780 return rc;
781}
782
783/*
784** Lock or unlock the actual file-system entry.
785*/
786static int getFileLock(AsyncLock *pLock){
787 int rc = SQLITE_OK;
788 AsyncFileLock *pIter;
789 int eRequired = 0;
790
791 if( pLock->pFile ){
792 for(pIter=pLock->pList; pIter; pIter=pIter->pNext){
793 assert(pIter->eAsyncLock>=pIter->eLock);
794 if( pIter->eAsyncLock>eRequired ){
795 eRequired = pIter->eAsyncLock;
796 assert(eRequired>=0 && eRequired<=SQLITE_LOCK_EXCLUSIVE);
797 }
798 }
799
800 if( eRequired>pLock->eLock ){
801 rc = pLock->pFile->pMethods->xLock(pLock->pFile, eRequired);
802 if( rc==SQLITE_OK ){
803 pLock->eLock = eRequired;
804 }
805 }
806 else if( eRequired<pLock->eLock && eRequired<=SQLITE_LOCK_SHARED ){
807 rc = pLock->pFile->pMethods->xUnlock(pLock->pFile, eRequired);
808 if( rc==SQLITE_OK ){
809 pLock->eLock = eRequired;
810 }
811 }
812 }
813
814 return rc;
815}
816
817/*
818** Return the AsyncLock structure from the global async.pLock list
819** associated with the file-system entry identified by path zName
820** (a string of nName bytes). If no such structure exists, return 0.
821*/
822static AsyncLock *findLock(const char *zName, int nName){
823 AsyncLock *p = async.pLock;
824 while( p && (p->nFile!=nName || memcmp(p->zFile, zName, nName)) ){
825 p = p->pNext;
826 }
827 return p;
828}
829
830/*
831** The following two methods - asyncLock() and asyncUnlock() - are used
832** to obtain and release locks on database files opened with the
833** asynchronous backend.
834*/
835static int asyncLock(sqlite3_file *pFile, int eLock){
836 int rc = SQLITE_OK;
837 AsyncFileData *p = ((AsyncFile *)pFile)->pData;
838
839 if( p->zName ){
840 async_mutex_enter(ASYNC_MUTEX_LOCK);
841 if( p->lock.eLock<eLock ){
842 AsyncLock *pLock = p->pLock;
843 AsyncFileLock *pIter;
844 assert(pLock && pLock->pList);
845 for(pIter=pLock->pList; pIter; pIter=pIter->pNext){
846 if( pIter!=&p->lock && (
847 (eLock==SQLITE_LOCK_EXCLUSIVE && pIter->eLock>=SQLITE_LOCK_SHARED) ||
848 (eLock==SQLITE_LOCK_PENDING && pIter->eLock>=SQLITE_LOCK_RESERVED) ||
849 (eLock==SQLITE_LOCK_RESERVED && pIter->eLock>=SQLITE_LOCK_RESERVED) ||
850 (eLock==SQLITE_LOCK_SHARED && pIter->eLock>=SQLITE_LOCK_PENDING)
851 )){
852 rc = SQLITE_BUSY;
853 }
854 }
855 if( rc==SQLITE_OK ){
856 p->lock.eLock = eLock;
857 p->lock.eAsyncLock = MAX(p->lock.eAsyncLock, eLock);
858 }
859 assert(p->lock.eAsyncLock>=p->lock.eLock);
860 if( rc==SQLITE_OK ){
861 rc = getFileLock(pLock);
862 }
863 }
864 async_mutex_leave(ASYNC_MUTEX_LOCK);
865 }
866
867 ASYNC_TRACE(("LOCK %d (%s) rc=%d\n", eLock, p->zName, rc));
868 return rc;
869}
870static int asyncUnlock(sqlite3_file *pFile, int eLock){
871 int rc = SQLITE_OK;
872 AsyncFileData *p = ((AsyncFile *)pFile)->pData;
873 if( p->zName ){
874 AsyncFileLock *pLock = &p->lock;
875 async_mutex_enter(ASYNC_MUTEX_QUEUE);
876 async_mutex_enter(ASYNC_MUTEX_LOCK);
877 pLock->eLock = MIN(pLock->eLock, eLock);
878 rc = addNewAsyncWrite(p, ASYNC_UNLOCK, 0, eLock, 0);
879 async_mutex_leave(ASYNC_MUTEX_LOCK);
880 async_mutex_leave(ASYNC_MUTEX_QUEUE);
881 }
882 return rc;
883}
884
885/*
886** This function is called when the pager layer first opens a database file
887** and is checking for a hot-journal.
888*/
889static int asyncCheckReservedLock(sqlite3_file *pFile, int *pResOut){
890 int ret = 0;
891 AsyncFileLock *pIter;
892 AsyncFileData *p = ((AsyncFile *)pFile)->pData;
893
894 async_mutex_enter(ASYNC_MUTEX_LOCK);
895 for(pIter=p->pLock->pList; pIter; pIter=pIter->pNext){
896 if( pIter->eLock>=SQLITE_LOCK_RESERVED ){
897 ret = 1;
898 }
899 }
900 async_mutex_leave(ASYNC_MUTEX_LOCK);
901
902 ASYNC_TRACE(("CHECK-LOCK %d (%s)\n", ret, p->zName));
903 *pResOut = ret;
904 return SQLITE_OK;
905}
906
907/*
908** sqlite3_file_control() implementation.
909*/
910static int asyncFileControl(sqlite3_file *id, int op, void *pArg){
911 switch( op ){
912 case SQLITE_FCNTL_LOCKSTATE: {
913 async_mutex_enter(ASYNC_MUTEX_LOCK);
914 *(int*)pArg = ((AsyncFile*)id)->pData->lock.eLock;
915 async_mutex_leave(ASYNC_MUTEX_LOCK);
916 return SQLITE_OK;
917 }
918 }
919 return SQLITE_ERROR;
920}
921
922/*
923** Return the device characteristics and sector-size of the device. It
924** is not tricky to implement these correctly, as this backend might
925** not have an open file handle at this point.
926*/
927static int asyncSectorSize(sqlite3_file *pFile){
928 return 512;
929}
930static int asyncDeviceCharacteristics(sqlite3_file *pFile){
931 return 0;
932}
933
934static int unlinkAsyncFile(AsyncFileData *pData){
935 AsyncFileLock **ppIter;
936 int rc = SQLITE_OK;
937
938 if( pData->zName ){
939 AsyncLock *pLock = pData->pLock;
940 for(ppIter=&pLock->pList; *ppIter; ppIter=&((*ppIter)->pNext)){
941 if( (*ppIter)==&pData->lock ){
942 *ppIter = pData->lock.pNext;
943 break;
944 }
945 }
946 if( !pLock->pList ){
947 AsyncLock **pp;
948 if( pLock->pFile ){
949 pLock->pFile->pMethods->xClose(pLock->pFile);
950 }
951 for(pp=&async.pLock; *pp!=pLock; pp=&((*pp)->pNext));
952 *pp = pLock->pNext;
953 sqlite3_free(pLock);
954 }else{
955 rc = getFileLock(pLock);
956 }
957 }
958
959 return rc;
960}
961
962/*
963** The parameter passed to this function is a copy of a 'flags' parameter
964** passed to this modules xOpen() method. This function returns true
965** if the file should be opened asynchronously, or false if it should
966** be opened immediately.
967**
968** If the file is to be opened asynchronously, then asyncOpen() will add
969** an entry to the event queue and the file will not actually be opened
970** until the event is processed. Otherwise, the file is opened directly
971** by the caller.
972*/
973static int doAsynchronousOpen(int flags){
974 return (flags&SQLITE_OPEN_CREATE) && (
975 (flags&SQLITE_OPEN_MAIN_JOURNAL) ||
976 (flags&SQLITE_OPEN_TEMP_JOURNAL) ||
977 (flags&SQLITE_OPEN_DELETEONCLOSE)
978 );
979}
980
981/*
982** Open a file.
983*/
984static int asyncOpen(
985 sqlite3_vfs *pAsyncVfs,
986 const char *zName,
987 sqlite3_file *pFile,
988 int flags,
989 int *pOutFlags
990){
991 static sqlite3_io_methods async_methods = {
992 1, /* iVersion */
993 asyncClose, /* xClose */
994 asyncRead, /* xRead */
995 asyncWrite, /* xWrite */
996 asyncTruncate, /* xTruncate */
997 asyncSync, /* xSync */
998 asyncFileSize, /* xFileSize */
999 asyncLock, /* xLock */
1000 asyncUnlock, /* xUnlock */
1001 asyncCheckReservedLock, /* xCheckReservedLock */
1002 asyncFileControl, /* xFileControl */
1003 asyncSectorSize, /* xSectorSize */
1004 asyncDeviceCharacteristics /* xDeviceCharacteristics */
1005 };
1006
1007 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1008 AsyncFile *p = (AsyncFile *)pFile;
1009 int nName = 0;
1010 int rc = SQLITE_OK;
1011 int nByte;
1012 AsyncFileData *pData;
1013 AsyncLock *pLock = 0;
1014 char *z;
1015 int isAsyncOpen = doAsynchronousOpen(flags);
1016
1017 /* If zName is NULL, then the upper layer is requesting an anonymous file */
1018 if( zName ){
1019 nName = strlen(zName)+1;
1020 }
1021
1022 nByte = (
1023 sizeof(AsyncFileData) + /* AsyncFileData structure */
1024 2 * pVfs->szOsFile + /* AsyncFileData.pBaseRead and pBaseWrite */
1025 nName /* AsyncFileData.zName */
1026 );
1027 z = sqlite3_malloc(nByte);
1028 if( !z ){
1029 return SQLITE_NOMEM;
1030 }
1031 memset(z, 0, nByte);
1032 pData = (AsyncFileData*)z;
1033 z += sizeof(pData[0]);
1034 pData->pBaseRead = (sqlite3_file*)z;
1035 z += pVfs->szOsFile;
1036 pData->pBaseWrite = (sqlite3_file*)z;
1037 pData->closeOp.pFileData = pData;
1038 pData->closeOp.op = ASYNC_CLOSE;
1039
1040 if( zName ){
1041 z += pVfs->szOsFile;
1042 pData->zName = z;
1043 pData->nName = nName;
1044 memcpy(pData->zName, zName, nName);
1045 }
1046
1047 if( !isAsyncOpen ){
1048 int flagsout;
1049 rc = pVfs->xOpen(pVfs, pData->zName, pData->pBaseRead, flags, &flagsout);
1050 if( rc==SQLITE_OK && (flagsout&SQLITE_OPEN_READWRITE) ){
1051 rc = pVfs->xOpen(pVfs, pData->zName, pData->pBaseWrite, flags, 0);
1052 }
1053 if( pOutFlags ){
1054 *pOutFlags = flagsout;
1055 }
1056 }
1057
1058 async_mutex_enter(ASYNC_MUTEX_LOCK);
1059
1060 if( zName && rc==SQLITE_OK ){
1061 pLock = findLock(pData->zName, pData->nName);
1062 if( !pLock ){
1063 int nByte = pVfs->szOsFile + sizeof(AsyncLock) + pData->nName + 1;
1064 pLock = (AsyncLock *)sqlite3_malloc(nByte);
1065 if( pLock ){
1066 memset(pLock, 0, nByte);
1067#ifdef ENABLE_FILE_LOCKING
1068 if( flags&SQLITE_OPEN_MAIN_DB ){
1069 pLock->pFile = (sqlite3_file *)&pLock[1];
1070 rc = pVfs->xOpen(pVfs, pData->zName, pLock->pFile, flags, 0);
1071 if( rc!=SQLITE_OK ){
1072 sqlite3_free(pLock);
1073 pLock = 0;
1074 }
1075 }
1076#endif
1077 if( pLock ){
1078 pLock->nFile = pData->nName;
1079 pLock->zFile = &((char *)(&pLock[1]))[pVfs->szOsFile];
1080 memcpy(pLock->zFile, pData->zName, pLock->nFile);
1081 pLock->pNext = async.pLock;
1082 async.pLock = pLock;
1083 }
1084 }else{
1085 rc = SQLITE_NOMEM;
1086 }
1087 }
1088 }
1089
1090 if( rc==SQLITE_OK ){
1091 p->pMethod = &async_methods;
1092 p->pData = pData;
1093
1094 /* Link AsyncFileData.lock into the linked list of
1095 ** AsyncFileLock structures for this file.
1096 */
1097 if( zName ){
1098 pData->lock.pNext = pLock->pList;
1099 pLock->pList = &pData->lock;
1100 pData->zName = pLock->zFile;
1101 }
1102 }else{
1103 if( pData->pBaseRead->pMethods ){
1104 pData->pBaseRead->pMethods->xClose(pData->pBaseRead);
1105 }
1106 if( pData->pBaseWrite->pMethods ){
1107 pData->pBaseWrite->pMethods->xClose(pData->pBaseWrite);
1108 }
1109 sqlite3_free(pData);
1110 }
1111
1112 async_mutex_leave(ASYNC_MUTEX_LOCK);
1113
1114 if( rc==SQLITE_OK ){
1115 incrOpenFileCount();
1116 pData->pLock = pLock;
1117 }
1118
1119 if( rc==SQLITE_OK && isAsyncOpen ){
1120 rc = addNewAsyncWrite(pData, ASYNC_OPENEXCLUSIVE, (sqlite3_int64)flags,0,0);
1121 if( rc==SQLITE_OK ){
1122 if( pOutFlags ) *pOutFlags = flags;
1123 }else{
1124 async_mutex_enter(ASYNC_MUTEX_LOCK);
1125 unlinkAsyncFile(pData);
1126 async_mutex_leave(ASYNC_MUTEX_LOCK);
1127 sqlite3_free(pData);
1128 }
1129 }
1130 if( rc!=SQLITE_OK ){
1131 p->pMethod = 0;
1132 }
1133 return rc;
1134}
1135
1136/*
1137** Implementation of sqlite3OsDelete. Add an entry to the end of the
1138** write-op queue to perform the delete.
1139*/
1140static int asyncDelete(sqlite3_vfs *pAsyncVfs, const char *z, int syncDir){
1141 return addNewAsyncWrite(0, ASYNC_DELETE, syncDir, strlen(z)+1, z);
1142}
1143
1144/*
1145** Implementation of sqlite3OsAccess. This method holds the mutex from
1146** start to finish.
1147*/
1148static int asyncAccess(
1149 sqlite3_vfs *pAsyncVfs,
1150 const char *zName,
1151 int flags,
1152 int *pResOut
1153){
1154 int rc;
1155 int ret;
1156 AsyncWrite *p;
1157 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1158
1159 assert(flags==SQLITE_ACCESS_READWRITE
1160 || flags==SQLITE_ACCESS_READ
1161 || flags==SQLITE_ACCESS_EXISTS
1162 );
1163
1164 async_mutex_enter(ASYNC_MUTEX_QUEUE);
1165 rc = pVfs->xAccess(pVfs, zName, flags, &ret);
1166 if( rc==SQLITE_OK && flags==SQLITE_ACCESS_EXISTS ){
1167 for(p=async.pQueueFirst; p; p = p->pNext){
1168 if( p->op==ASYNC_DELETE && 0==strcmp(p->zBuf, zName) ){
1169 ret = 0;
1170 }else if( p->op==ASYNC_OPENEXCLUSIVE
1171 && p->pFileData->zName
1172 && 0==strcmp(p->pFileData->zName, zName)
1173 ){
1174 ret = 1;
1175 }
1176 }
1177 }
1178 ASYNC_TRACE(("ACCESS(%s): %s = %d\n",
1179 flags==SQLITE_ACCESS_READWRITE?"read-write":
1180 flags==SQLITE_ACCESS_READ?"read":"exists"
1181 , zName, ret)
1182 );
1183 async_mutex_leave(ASYNC_MUTEX_QUEUE);
1184 *pResOut = ret;
1185 return rc;
1186}
1187
1188/*
1189** Fill in zPathOut with the full path to the file identified by zPath.
1190*/
1191static int asyncFullPathname(
1192 sqlite3_vfs *pAsyncVfs,
1193 const char *zPath,
1194 int nPathOut,
1195 char *zPathOut
1196){
1197 int rc;
1198 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1199 rc = pVfs->xFullPathname(pVfs, zPath, nPathOut, zPathOut);
1200
1201 /* Because of the way intra-process file locking works, this backend
1202 ** needs to return a canonical path. The following block assumes the
1203 ** file-system uses unix style paths.
1204 */
1205 if( rc==SQLITE_OK ){
1206 int i, j;
1207 int n = nPathOut;
1208 char *z = zPathOut;
1209 while( n>1 && z[n-1]=='/' ){ n--; }
1210 for(i=j=0; i<n; i++){
1211 if( z[i]=='/' ){
1212 if( z[i+1]=='/' ) continue;
1213 if( z[i+1]=='.' && i+2<n && z[i+2]=='/' ){
1214 i += 1;
1215 continue;
1216 }
1217 if( z[i+1]=='.' && i+3<n && z[i+2]=='.' && z[i+3]=='/' ){
1218 while( j>0 && z[j-1]!='/' ){ j--; }
1219 if( j>0 ){ j--; }
1220 i += 2;
1221 continue;
1222 }
1223 }
1224 z[j++] = z[i];
1225 }
1226 z[j] = 0;
1227 }
1228
1229 return rc;
1230}
1231static void *asyncDlOpen(sqlite3_vfs *pAsyncVfs, const char *zPath){
1232 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1233 return pVfs->xDlOpen(pVfs, zPath);
1234}
1235static void asyncDlError(sqlite3_vfs *pAsyncVfs, int nByte, char *zErrMsg){
1236 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1237 pVfs->xDlError(pVfs, nByte, zErrMsg);
1238}
1239static void (*asyncDlSym(
1240 sqlite3_vfs *pAsyncVfs,
1241 void *pHandle,
1242 const char *zSymbol
1243))(void){
1244 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1245 return pVfs->xDlSym(pVfs, pHandle, zSymbol);
1246}
1247static void asyncDlClose(sqlite3_vfs *pAsyncVfs, void *pHandle){
1248 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1249 pVfs->xDlClose(pVfs, pHandle);
1250}
1251static int asyncRandomness(sqlite3_vfs *pAsyncVfs, int nByte, char *zBufOut){
1252 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1253 return pVfs->xRandomness(pVfs, nByte, zBufOut);
1254}
1255static int asyncSleep(sqlite3_vfs *pAsyncVfs, int nMicro){
1256 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1257 return pVfs->xSleep(pVfs, nMicro);
1258}
1259static int asyncCurrentTime(sqlite3_vfs *pAsyncVfs, double *pTimeOut){
1260 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1261 return pVfs->xCurrentTime(pVfs, pTimeOut);
1262}
1263
1264static sqlite3_vfs async_vfs = {
1265 1, /* iVersion */
1266 sizeof(AsyncFile), /* szOsFile */
1267 0, /* mxPathname */
1268 0, /* pNext */
1269 SQLITEASYNC_VFSNAME, /* zName */
1270 0, /* pAppData */
1271 asyncOpen, /* xOpen */
1272 asyncDelete, /* xDelete */
1273 asyncAccess, /* xAccess */
1274 asyncFullPathname, /* xFullPathname */
1275 asyncDlOpen, /* xDlOpen */
1276 asyncDlError, /* xDlError */
1277 asyncDlSym, /* xDlSym */
1278 asyncDlClose, /* xDlClose */
1279 asyncRandomness, /* xDlError */
1280 asyncSleep, /* xDlSym */
1281 asyncCurrentTime /* xDlClose */
1282};
1283
1284/*
1285** This procedure runs in a separate thread, reading messages off of the
1286** write queue and processing them one by one.
1287**
1288** If async.writerHaltNow is true, then this procedure exits
1289** after processing a single message.
1290**
1291** If async.writerHaltWhenIdle is true, then this procedure exits when
1292** the write queue is empty.
1293**
1294** If both of the above variables are false, this procedure runs
1295** indefinately, waiting for operations to be added to the write queue
1296** and processing them in the order in which they arrive.
1297**
1298** An artifical delay of async.ioDelay milliseconds is inserted before
1299** each write operation in order to simulate the effect of a slow disk.
1300**
1301** Only one instance of this procedure may be running at a time.
1302*/
1303static void asyncWriterThread(void){
1304 sqlite3_vfs *pVfs = (sqlite3_vfs *)(async_vfs.pAppData);
1305 AsyncWrite *p = 0;
1306 int rc = SQLITE_OK;
1307 int holdingMutex = 0;
1308
1309 async_mutex_enter(ASYNC_MUTEX_WRITER);
1310
1311 while( async.eHalt!=SQLITEASYNC_HALT_NOW ){
1312 int doNotFree = 0;
1313 sqlite3_file *pBase = 0;
1314
1315 if( !holdingMutex ){
1316 async_mutex_enter(ASYNC_MUTEX_QUEUE);
1317 }
1318 while( (p = async.pQueueFirst)==0 ){
1319 if( async.eHalt!=SQLITEASYNC_HALT_NEVER ){
1320 async_mutex_leave(ASYNC_MUTEX_QUEUE);
1321 break;
1322 }else{
1323 ASYNC_TRACE(("IDLE\n"));
1324 async_cond_wait(ASYNC_COND_QUEUE, ASYNC_MUTEX_QUEUE);
1325 ASYNC_TRACE(("WAKEUP\n"));
1326 }
1327 }
1328 if( p==0 ) break;
1329 holdingMutex = 1;
1330
1331 /* Right now this thread is holding the mutex on the write-op queue.
1332 ** Variable 'p' points to the first entry in the write-op queue. In
1333 ** the general case, we hold on to the mutex for the entire body of
1334 ** the loop.
1335 **
1336 ** However in the cases enumerated below, we relinquish the mutex,
1337 ** perform the IO, and then re-request the mutex before removing 'p' from
1338 ** the head of the write-op queue. The idea is to increase concurrency with
1339 ** sqlite threads.
1340 **
1341 ** * An ASYNC_CLOSE operation.
1342 ** * An ASYNC_OPENEXCLUSIVE operation. For this one, we relinquish
1343 ** the mutex, call the underlying xOpenExclusive() function, then
1344 ** re-aquire the mutex before seting the AsyncFile.pBaseRead
1345 ** variable.
1346 ** * ASYNC_SYNC and ASYNC_WRITE operations, if
1347 ** SQLITE_ASYNC_TWO_FILEHANDLES was set at compile time and two
1348 ** file-handles are open for the particular file being "synced".
1349 */
1350 if( async.ioError!=SQLITE_OK && p->op!=ASYNC_CLOSE ){
1351 p->op = ASYNC_NOOP;
1352 }
1353 if( p->pFileData ){
1354 pBase = p->pFileData->pBaseWrite;
1355 if(
1356 p->op==ASYNC_CLOSE ||
1357 p->op==ASYNC_OPENEXCLUSIVE ||
1358 (pBase->pMethods && (p->op==ASYNC_SYNC || p->op==ASYNC_WRITE) )
1359 ){
1360 async_mutex_leave(ASYNC_MUTEX_QUEUE);
1361 holdingMutex = 0;
1362 }
1363 if( !pBase->pMethods ){
1364 pBase = p->pFileData->pBaseRead;
1365 }
1366 }
1367
1368 switch( p->op ){
1369 case ASYNC_NOOP:
1370 break;
1371
1372 case ASYNC_WRITE:
1373 assert( pBase );
1374 ASYNC_TRACE(("WRITE %s %d bytes at %d\n",
1375 p->pFileData->zName, p->nByte, p->iOffset));
1376 rc = pBase->pMethods->xWrite(pBase, (void *)(p->zBuf), p->nByte, p->iOffset);
1377 break;
1378
1379 case ASYNC_SYNC:
1380 assert( pBase );
1381 ASYNC_TRACE(("SYNC %s\n", p->pFileData->zName));
1382 rc = pBase->pMethods->xSync(pBase, p->nByte);
1383 break;
1384
1385 case ASYNC_TRUNCATE:
1386 assert( pBase );
1387 ASYNC_TRACE(("TRUNCATE %s to %d bytes\n",
1388 p->pFileData->zName, p->iOffset));
1389 rc = pBase->pMethods->xTruncate(pBase, p->iOffset);
1390 break;
1391
1392 case ASYNC_CLOSE: {
1393 AsyncFileData *pData = p->pFileData;
1394 ASYNC_TRACE(("CLOSE %s\n", p->pFileData->zName));
1395 if( pData->pBaseWrite->pMethods ){
1396 pData->pBaseWrite->pMethods->xClose(pData->pBaseWrite);
1397 }
1398 if( pData->pBaseRead->pMethods ){
1399 pData->pBaseRead->pMethods->xClose(pData->pBaseRead);
1400 }
1401
1402 /* Unlink AsyncFileData.lock from the linked list of AsyncFileLock
1403 ** structures for this file. Obtain the async.lockMutex mutex
1404 ** before doing so.
1405 */
1406 async_mutex_enter(ASYNC_MUTEX_LOCK);
1407 rc = unlinkAsyncFile(pData);
1408 async_mutex_leave(ASYNC_MUTEX_LOCK);
1409
1410 if( !holdingMutex ){
1411 async_mutex_enter(ASYNC_MUTEX_QUEUE);
1412 holdingMutex = 1;
1413 }
1414 assert_mutex_is_held(ASYNC_MUTEX_QUEUE);
1415 async.pQueueFirst = p->pNext;
1416 sqlite3_free(pData);
1417 doNotFree = 1;
1418 break;
1419 }
1420
1421 case ASYNC_UNLOCK: {
1422 AsyncWrite *pIter;
1423 AsyncFileData *pData = p->pFileData;
1424 int eLock = p->nByte;
1425
1426 /* When a file is locked by SQLite using the async backend, it is
1427 ** locked within the 'real' file-system synchronously. When it is
1428 ** unlocked, an ASYNC_UNLOCK event is added to the write-queue to
1429 ** unlock the file asynchronously. The design of the async backend
1430 ** requires that the 'real' file-system file be locked from the
1431 ** time that SQLite first locks it (and probably reads from it)
1432 ** until all asynchronous write events that were scheduled before
1433 ** SQLite unlocked the file have been processed.
1434 **
1435 ** This is more complex if SQLite locks and unlocks the file multiple
1436 ** times in quick succession. For example, if SQLite does:
1437 **
1438 ** lock, write, unlock, lock, write, unlock
1439 **
1440 ** Each "lock" operation locks the file immediately. Each "write"
1441 ** and "unlock" operation adds an event to the event queue. If the
1442 ** second "lock" operation is performed before the first "unlock"
1443 ** operation has been processed asynchronously, then the first
1444 ** "unlock" cannot be safely processed as is, since this would mean
1445 ** the file was unlocked when the second "write" operation is
1446 ** processed. To work around this, when processing an ASYNC_UNLOCK
1447 ** operation, SQLite:
1448 **
1449 ** 1) Unlocks the file to the minimum of the argument passed to
1450 ** the xUnlock() call and the current lock from SQLite's point
1451 ** of view, and
1452 **
1453 ** 2) Only unlocks the file at all if this event is the last
1454 ** ASYNC_UNLOCK event on this file in the write-queue.
1455 */
1456 assert( holdingMutex==1 );
1457 assert( async.pQueueFirst==p );
1458 for(pIter=async.pQueueFirst->pNext; pIter; pIter=pIter->pNext){
1459 if( pIter->pFileData==pData && pIter->op==ASYNC_UNLOCK ) break;
1460 }
1461 if( !pIter ){
1462 async_mutex_enter(ASYNC_MUTEX_LOCK);
1463 pData->lock.eAsyncLock = MIN(
1464 pData->lock.eAsyncLock, MAX(pData->lock.eLock, eLock)
1465 );
1466 assert(pData->lock.eAsyncLock>=pData->lock.eLock);
1467 rc = getFileLock(pData->pLock);
1468 async_mutex_leave(ASYNC_MUTEX_LOCK);
1469 }
1470 break;
1471 }
1472
1473 case ASYNC_DELETE:
1474 ASYNC_TRACE(("DELETE %s\n", p->zBuf));
1475 rc = pVfs->xDelete(pVfs, p->zBuf, (int)p->iOffset);
1476 break;
1477
1478 case ASYNC_OPENEXCLUSIVE: {
1479 int flags = (int)p->iOffset;
1480 AsyncFileData *pData = p->pFileData;
1481 ASYNC_TRACE(("OPEN %s flags=%d\n", p->zBuf, (int)p->iOffset));
1482 assert(pData->pBaseRead->pMethods==0 && pData->pBaseWrite->pMethods==0);
1483 rc = pVfs->xOpen(pVfs, pData->zName, pData->pBaseRead, flags, 0);
1484 assert( holdingMutex==0 );
1485 async_mutex_enter(ASYNC_MUTEX_QUEUE);
1486 holdingMutex = 1;
1487 break;
1488 }
1489
1490 default: assert(!"Illegal value for AsyncWrite.op");
1491 }
1492
1493 /* If we didn't hang on to the mutex during the IO op, obtain it now
1494 ** so that the AsyncWrite structure can be safely removed from the
1495 ** global write-op queue.
1496 */
1497 if( !holdingMutex ){
1498 async_mutex_enter(ASYNC_MUTEX_QUEUE);
1499 holdingMutex = 1;
1500 }
1501 /* ASYNC_TRACE(("UNLINK %p\n", p)); */
1502 if( p==async.pQueueLast ){
1503 async.pQueueLast = 0;
1504 }
1505 if( !doNotFree ){
1506 assert_mutex_is_held(ASYNC_MUTEX_QUEUE);
1507 async.pQueueFirst = p->pNext;
1508 sqlite3_free(p);
1509 }
1510 assert( holdingMutex );
1511
1512 /* An IO error has occurred. We cannot report the error back to the
1513 ** connection that requested the I/O since the error happened
1514 ** asynchronously. The connection has already moved on. There
1515 ** really is nobody to report the error to.
1516 **
1517 ** The file for which the error occurred may have been a database or
1518 ** journal file. Regardless, none of the currently queued operations
1519 ** associated with the same database should now be performed. Nor should
1520 ** any subsequently requested IO on either a database or journal file
1521 ** handle for the same database be accepted until the main database
1522 ** file handle has been closed and reopened.
1523 **
1524 ** Furthermore, no further IO should be queued or performed on any file
1525 ** handle associated with a database that may have been part of a
1526 ** multi-file transaction that included the database associated with
1527 ** the IO error (i.e. a database ATTACHed to the same handle at some
1528 ** point in time).
1529 */
1530 if( rc!=SQLITE_OK ){
1531 async.ioError = rc;
1532 }
1533
1534 if( async.ioError && !async.pQueueFirst ){
1535 async_mutex_enter(ASYNC_MUTEX_LOCK);
1536 if( 0==async.pLock ){
1537 async.ioError = SQLITE_OK;
1538 }
1539 async_mutex_leave(ASYNC_MUTEX_LOCK);
1540 }
1541
1542 /* Drop the queue mutex before continuing to the next write operation
1543 ** in order to give other threads a chance to work with the write queue.
1544 */
1545 if( !async.pQueueFirst || !async.ioError ){
1546 async_mutex_leave(ASYNC_MUTEX_QUEUE);
1547 holdingMutex = 0;
1548 if( async.ioDelay>0 ){
1549 pVfs->xSleep(pVfs, async.ioDelay);
1550 }else{
1551 async_sched_yield();
1552 }
1553 }
1554 }
1555
1556 async_mutex_leave(ASYNC_MUTEX_WRITER);
1557 return;
1558}
1559
1560/*
1561** Install the asynchronous VFS.
1562*/
1563int sqlite3async_initialize(const char *zParent, int isDefault){
1564 int rc = SQLITE_OK;
1565 if( async_vfs.pAppData==0 ){
1566 sqlite3_vfs *pParent = sqlite3_vfs_find(zParent);
1567 if( !pParent || async_os_initialize() ){
1568 rc = SQLITE_ERROR;
1569 }else if( SQLITE_OK!=(rc = sqlite3_vfs_register(&async_vfs, isDefault)) ){
1570 async_os_shutdown();
1571 }else{
1572 async_vfs.pAppData = (void *)pParent;
1573 async_vfs.mxPathname = ((sqlite3_vfs *)async_vfs.pAppData)->mxPathname;
1574 }
1575 }
1576 return rc;
1577}
1578
1579/*
1580** Uninstall the asynchronous VFS.
1581*/
1582void sqlite3async_shutdown(void){
1583 if( async_vfs.pAppData ){
1584 async_os_shutdown();
1585 sqlite3_vfs_unregister((sqlite3_vfs *)&async_vfs);
1586 async_vfs.pAppData = 0;
1587 }
1588}
1589
1590/*
1591** Process events on the write-queue.
1592*/
1593void sqlite3async_run(void){
1594 asyncWriterThread();
1595}
1596
1597/*
1598** Control/configure the asynchronous IO system.
1599*/
1600int sqlite3async_control(int op, ...){
1601 va_list ap;
1602 va_start(ap, op);
1603 switch( op ){
1604 case SQLITEASYNC_HALT: {
1605 int eWhen = va_arg(ap, int);
1606 if( eWhen!=SQLITEASYNC_HALT_NEVER
1607 && eWhen!=SQLITEASYNC_HALT_NOW
1608 && eWhen!=SQLITEASYNC_HALT_IDLE
1609 ){
1610 return SQLITE_ERROR;
1611 }
1612 async.eHalt = eWhen;
1613 async_mutex_enter(ASYNC_MUTEX_QUEUE);
1614 async_cond_signal(ASYNC_COND_QUEUE);
1615 async_mutex_leave(ASYNC_MUTEX_QUEUE);
1616 break;
1617 }
1618
1619 case SQLITEASYNC_DELAY: {
1620 int iDelay = va_arg(ap, int);
1621 async.ioDelay = iDelay;
1622 break;
1623 }
1624
1625 case SQLITEASYNC_GET_HALT: {
1626 int *peWhen = va_arg(ap, int *);
1627 *peWhen = async.eHalt;
1628 break;
1629 }
1630 case SQLITEASYNC_GET_DELAY: {
1631 int *piDelay = va_arg(ap, int *);
1632 *piDelay = async.ioDelay;
1633 break;
1634 }
1635
1636 default:
1637 return SQLITE_ERROR;
1638 }
1639 return SQLITE_OK;
1640}
1641
1642#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_ASYNCIO) */
1643