blob: 1627f912fa8453c0d60ba9a787952f8c5b616fac [file] [log] [blame]
drh23669402006-01-09 17:29:52 +00001/*
2** 2005 December 14
3**
4** The author disclaims copyright to this source code. In place of
5** a legal notice, here is a blessing:
6**
7** May you do good and not evil.
8** May you find forgiveness for yourself and forgive others.
9** May you share freely, never taking more than you give.
10**
11*************************************************************************
12**
13** This file contains an example implementation of an asynchronous IO
drhfe0f75b2006-01-10 20:01:18 +000014** backend for SQLite.
15**
16** WHAT IS ASYNCHRONOUS I/O?
17**
18** With asynchronous I/O, write requests are handled by a separate thread
19** running in the background. This means that the thread that initiates
20** a database write does not have to wait for (sometimes slow) disk I/O
21** to occur. The write seems to happen very quickly, though in reality
22** it is happening at its usual slow pace in the background.
23**
24** Asynchronous I/O appears to give better responsiveness, but at a price.
25** You lose the Durable property. With the default I/O backend of SQLite,
26** once a write completes, you know that the information you wrote is
27** safely on disk. With the asynchronous I/O, this is no the case. If
28** your program crashes or if you take a power lose after the database
29** write but before the asynchronous write thread has completed, then the
30** database change might never make it to disk and the next user of the
31** database might not see your change.
32**
33** You lose Durability with asynchronous I/O, but you still retain the
34** other parts of ACID: Atomic, Consistent, and Isolated. Many
35** appliations get along fine without the Durablity.
36**
37** HOW IT WORKS
38**
39** Asynchronous I/O works by overloading the OS-layer disk I/O routines
40** with modified versions that store the data to be written in queue of
41** pending write operations. Look at the asyncEnable() subroutine to see
42** how overloading works. Six os-layer routines are overloaded:
43**
44** sqlite3OsOpenReadWrite;
45** sqlite3OsOpenReadOnly;
46** sqlite3OsOpenExclusive;
47** sqlite3OsDelete;
48** sqlite3OsFileExists;
49** sqlite3OsSyncDirectory;
50**
51** The original implementations of these routines are saved and are
52** used by the writer thread to do the real I/O. The substitute
53** implementations typically put the I/O operation on a queue
54** to be handled later by the writer thread, though read operations
55** must be handled right away, obviously.
56**
57** Asynchronous I/O is disabled by setting the os-layer interface routines
58** back to their original values.
59**
60** LIMITATIONS
61**
62** This demonstration code is deliberately kept simple in order to keep
63** the main ideas clear and easy to understand. Real applications that
64** want to do asynchronous I/O might want to add additional capabilities.
65** For example, in this demonstration if writes are happening at a steady
66** stream that exceeds the I/O capability of the background writer thread,
67** the queue of pending write operations will grow without bound until we
68** run out of memory. Users of this technique may want to keep track of
69** the quantity of pending writes and stop accepting new write requests
70** when the buffer gets to be too big.
drh23669402006-01-09 17:29:52 +000071*/
72
73#include "sqliteInt.h"
drh23669402006-01-09 17:29:52 +000074#include <tcl.h>
75
drh23669402006-01-09 17:29:52 +000076/*
77** This test uses pthreads and hence only works on unix and with
danielk19770e87b702007-08-25 12:29:30 +000078** a threadsafe build of SQLite.
drh23669402006-01-09 17:29:52 +000079*/
danielk19770e87b702007-08-25 12:29:30 +000080#if OS_UNIX && SQLITE_THREADSAFE
drh23669402006-01-09 17:29:52 +000081
drhfe0f75b2006-01-10 20:01:18 +000082/*
83** This demo uses pthreads. If you do not have a pthreads implementation
84** for your operating system, you will need to recode the threading
85** logic.
86*/
drh23669402006-01-09 17:29:52 +000087#include <pthread.h>
88#include <sched.h>
89
drhfe0f75b2006-01-10 20:01:18 +000090/* Useful macros used in several places */
drh23669402006-01-09 17:29:52 +000091#define MIN(x,y) ((x)<(y)?(x):(y))
92#define MAX(x,y) ((x)>(y)?(x):(y))
93
drhfe0f75b2006-01-10 20:01:18 +000094/* Forward references */
drh23669402006-01-09 17:29:52 +000095typedef struct AsyncWrite AsyncWrite;
96typedef struct AsyncFile AsyncFile;
danielk19770e87b702007-08-25 12:29:30 +000097typedef struct AsyncFileData AsyncFileData;
drh23669402006-01-09 17:29:52 +000098
drhfe0f75b2006-01-10 20:01:18 +000099/* Enable for debugging */
drh99681db2006-02-13 15:29:32 +0000100static int sqlite3async_trace = 0;
drh4f0c5872007-03-26 22:05:01 +0000101# define ASYNC_TRACE(X) if( sqlite3async_trace ) asyncTrace X
drhfc8748a2006-02-13 14:49:38 +0000102static void asyncTrace(const char *zFormat, ...){
103 char *z;
104 va_list ap;
105 va_start(ap, zFormat);
106 z = sqlite3_vmprintf(zFormat, ap);
107 va_end(ap);
108 fprintf(stderr, "[%d] %s", (int)pthread_self(), z);
danielk197773375822007-03-22 15:20:00 +0000109 sqlite3_free(z);
drhfc8748a2006-02-13 14:49:38 +0000110}
drh23669402006-01-09 17:29:52 +0000111
112/*
drh23669402006-01-09 17:29:52 +0000113** THREAD SAFETY NOTES
114**
115** Basic rules:
116**
117** * Both read and write access to the global write-op queue must be
118** protected by the async.queueMutex.
119**
120** * The file handles from the underlying system are assumed not to
121** be thread safe.
122**
drhfe0f75b2006-01-10 20:01:18 +0000123** * See the last two paragraphs under "The Writer Thread" for
drh23669402006-01-09 17:29:52 +0000124** an assumption to do with file-handle synchronization by the Os.
125**
126** File system operations (invoked by SQLite thread):
127**
128** xOpenXXX (three versions)
129** xDelete
130** xFileExists
131** xSyncDirectory
132**
133** File handle operations (invoked by SQLite thread):
134**
drh23669402006-01-09 17:29:52 +0000135** asyncWrite, asyncClose, asyncTruncate, asyncSync,
136** asyncSetFullSync, asyncOpenDirectory.
137**
drhfe0f75b2006-01-10 20:01:18 +0000138** The operations above add an entry to the global write-op list. They
139** prepare the entry, acquire the async.queueMutex momentarily while
140** list pointers are manipulated to insert the new entry, then release
141** the mutex and signal the writer thread to wake up in case it happens
142** to be asleep.
143**
drh23669402006-01-09 17:29:52 +0000144**
145** asyncRead, asyncFileSize.
drhfe0f75b2006-01-10 20:01:18 +0000146**
147** Read operations. Both of these read from both the underlying file
148** first then adjust their result based on pending writes in the
149** write-op queue. So async.queueMutex is held for the duration
150** of these operations to prevent other threads from changing the
151** queue in mid operation.
152**
153**
154** asyncLock, asyncUnlock, asyncLockState, asyncCheckReservedLock
drh23669402006-01-09 17:29:52 +0000155**
drh89ea9312006-02-13 17:03:47 +0000156** These primitives implement in-process locking using a hash table
157** on the file name. Files are locked correctly for connections coming
158** from the same process. But other processes cannot see these locks
159** and will therefore not honor them.
drhfe0f75b2006-01-10 20:01:18 +0000160**
161**
162** asyncFileHandle.
drh23669402006-01-09 17:29:52 +0000163**
164** The sqlite3OsFileHandle() function is currently only used when
165** debugging the pager module. Unless sqlite3OsClose() is called on the
166** file (shouldn't be possible for other reasons), the underlying
167** implementations are safe to call without grabbing any mutex. So we just
drhfe0f75b2006-01-10 20:01:18 +0000168** go ahead and call it no matter what any other threads are doing.
drh23669402006-01-09 17:29:52 +0000169**
drhfe0f75b2006-01-10 20:01:18 +0000170**
171** asyncSeek.
drh23669402006-01-09 17:29:52 +0000172**
173** Calling this method just manipulates the AsyncFile.iOffset variable.
174** Since this variable is never accessed by writer thread, this
175** function does not require the mutex. Actual calls to OsSeek() take
176** place just before OsWrite() or OsRead(), which are always protected by
177** the mutex.
drh23669402006-01-09 17:29:52 +0000178**
179** The writer thread:
180**
181** The async.writerMutex is used to make sure only there is only
182** a single writer thread running at a time.
183**
184** Inside the writer thread is a loop that works like this:
185**
186** WHILE (write-op list is not empty)
187** Do IO operation at head of write-op list
188** Remove entry from head of write-op list
189** END WHILE
190**
191** The async.queueMutex is always held during the <write-op list is
192** not empty> test, and when the entry is removed from the head
193** of the write-op list. Sometimes it is held for the interim
drhfe0f75b2006-01-10 20:01:18 +0000194** period (while the IO is performed), and sometimes it is
drh23669402006-01-09 17:29:52 +0000195** relinquished. It is relinquished if (a) the IO op is an
196** ASYNC_CLOSE or (b) when the file handle was opened, two of
197** the underlying systems handles were opened on the same
198** file-system entry.
199**
200** If condition (b) above is true, then one file-handle
201** (AsyncFile.pBaseRead) is used exclusively by sqlite threads to read the
202** file, the other (AsyncFile.pBaseWrite) by sqlite3_async_flush()
203** threads to perform write() operations. This means that read
204** operations are not blocked by asynchronous writes (although
205** asynchronous writes may still be blocked by reads).
206**
207** This assumes that the OS keeps two handles open on the same file
208** properly in sync. That is, any read operation that starts after a
209** write operation on the same file system entry has completed returns
210** data consistent with the write. We also assume that if one thread
211** reads a file while another is writing it all bytes other than the
212** ones actually being written contain valid data.
213**
214** If the above assumptions are not true, set the preprocessor symbol
215** SQLITE_ASYNC_TWO_FILEHANDLES to 0.
216*/
217
218#ifndef SQLITE_ASYNC_TWO_FILEHANDLES
219/* #define SQLITE_ASYNC_TWO_FILEHANDLES 0 */
220#define SQLITE_ASYNC_TWO_FILEHANDLES 1
221#endif
222
223/*
224** State information is held in the static variable "async" defined
225** as follows:
226*/
227static struct TestAsyncStaticData {
228 pthread_mutex_t queueMutex; /* Mutex for access to write operation queue */
229 pthread_mutex_t writerMutex; /* Prevents multiple writer threads */
drh89ea9312006-02-13 17:03:47 +0000230 pthread_mutex_t lockMutex; /* For access to aLock hash table */
drh23669402006-01-09 17:29:52 +0000231 pthread_cond_t queueSignal; /* For waking up sleeping writer thread */
232 pthread_cond_t emptySignal; /* Notify when the write queue is empty */
233 AsyncWrite *pQueueFirst; /* Next write operation to be processed */
234 AsyncWrite *pQueueLast; /* Last write operation on the list */
drh89ea9312006-02-13 17:03:47 +0000235 Hash aLock; /* Files locked */
drh23669402006-01-09 17:29:52 +0000236 volatile int ioDelay; /* Extra delay between write operations */
237 volatile int writerHaltWhenIdle; /* Writer thread halts when queue empty */
238 volatile int writerHaltNow; /* Writer thread halts after next op */
danielk1977be29bfc2006-02-14 13:25:43 +0000239 int ioError; /* True if an IO error has occured */
240 int nFile; /* Number of open files (from sqlite pov) */
drh23669402006-01-09 17:29:52 +0000241} async = {
242 PTHREAD_MUTEX_INITIALIZER,
243 PTHREAD_MUTEX_INITIALIZER,
drh89ea9312006-02-13 17:03:47 +0000244 PTHREAD_MUTEX_INITIALIZER,
drh23669402006-01-09 17:29:52 +0000245 PTHREAD_COND_INITIALIZER,
246 PTHREAD_COND_INITIALIZER,
247};
248
249/* Possible values of AsyncWrite.op */
drh4b74b262006-02-13 13:50:55 +0000250#define ASYNC_NOOP 0
drh23669402006-01-09 17:29:52 +0000251#define ASYNC_WRITE 1
252#define ASYNC_SYNC 2
253#define ASYNC_TRUNCATE 3
254#define ASYNC_CLOSE 4
255#define ASYNC_OPENDIRECTORY 5
256#define ASYNC_SETFULLSYNC 6
drh23669402006-01-09 17:29:52 +0000257#define ASYNC_DELETE 7
258#define ASYNC_OPENEXCLUSIVE 8
259#define ASYNC_SYNCDIRECTORY 9
260
drh99681db2006-02-13 15:29:32 +0000261/* Names of opcodes. Used for debugging only.
262** Make sure these stay in sync with the macros above!
263*/
264static const char *azOpcodeName[] = {
265 "NOOP", "WRITE", "SYNC", "TRUNCATE", "CLOSE",
drh89ea9312006-02-13 17:03:47 +0000266 "OPENDIR", "SETFULLSYNC", "DELETE", "OPENEX", "SYNCDIR",
drh99681db2006-02-13 15:29:32 +0000267};
268
drh23669402006-01-09 17:29:52 +0000269/*
drhfe0f75b2006-01-10 20:01:18 +0000270** Entries on the write-op queue are instances of the AsyncWrite
271** structure, defined here.
272**
drh23669402006-01-09 17:29:52 +0000273** The interpretation of the iOffset and nByte variables varies depending
274** on the value of AsyncWrite.op:
275**
276** ASYNC_WRITE:
277** iOffset -> Offset in file to write to.
278** nByte -> Number of bytes of data to write (pointed to by zBuf).
279**
280** ASYNC_SYNC:
danielk19770e87b702007-08-25 12:29:30 +0000281** nByte -> flags to pass to sqlite3OsSync().
drh23669402006-01-09 17:29:52 +0000282**
283** ASYNC_TRUNCATE:
284** iOffset -> Size to truncate file to.
285** nByte -> Unused.
286**
287** ASYNC_CLOSE:
288** iOffset -> Unused.
289** nByte -> Unused.
290**
291** ASYNC_OPENDIRECTORY:
292** iOffset -> Unused.
293** nByte -> Number of bytes of zBuf points to (directory name).
294**
295** ASYNC_SETFULLSYNC:
296** iOffset -> Unused.
297** nByte -> New value for the full-sync flag.
298**
299**
300** ASYNC_DELETE:
danielk19770e87b702007-08-25 12:29:30 +0000301** iOffset -> Contains the "syncDir" flag.
drh23669402006-01-09 17:29:52 +0000302** nByte -> Number of bytes of zBuf points to (file name).
303**
304** ASYNC_OPENEXCLUSIVE:
305** iOffset -> Value of "delflag".
306** nByte -> Number of bytes of zBuf points to (file name).
307**
drh89ea9312006-02-13 17:03:47 +0000308**
drh23669402006-01-09 17:29:52 +0000309** For an ASYNC_WRITE operation, zBuf points to the data to write to the file.
drh17435752007-08-16 04:30:38 +0000310** This space is sqlite3_malloc()d along with the AsyncWrite structure in a
311** single blob, so is deleted when sqlite3_free() is called on the parent
drh23669402006-01-09 17:29:52 +0000312** structure.
313*/
314struct AsyncWrite {
danielk19770e87b702007-08-25 12:29:30 +0000315 AsyncFileData *pFileData; /* File to write data to or sync */
316 int op; /* One of ASYNC_xxx etc. */
drh23669402006-01-09 17:29:52 +0000317 i64 iOffset; /* See above */
318 int nByte; /* See above */
319 char *zBuf; /* Data to write to file (or NULL if op!=ASYNC_WRITE) */
320 AsyncWrite *pNext; /* Next write operation (to any file) */
321};
322
323/*
danielk19770e87b702007-08-25 12:29:30 +0000324** The AsyncFile structure is a subclass of sqlite3_file used for
325** asynchronous IO.
326**
327** All of the actual data for the structure is stored in the structure
328** pointed to by AsyncFile.pData, which is allocated as part of the
329** sqlite3OsOpen() using sqlite3_malloc(). The reason for this is that the
330** lifetime of the AsyncFile structure is ended by the caller after OsClose()
331** is called, but the data in AsyncFileData may be required by the
332** writer thread after that point.
drh23669402006-01-09 17:29:52 +0000333*/
334struct AsyncFile {
danielk19770e87b702007-08-25 12:29:30 +0000335 sqlite3_io_methods *pMethod;
336 AsyncFileData *pData;
337};
338struct AsyncFileData {
339 char *zName; /* Underlying OS filename - used for debugging */
340 int nName; /* Number of characters in zName */
341 sqlite3_file *pBaseRead; /* Read handle to the underlying Os file */
342 sqlite3_file *pBaseWrite; /* Write handle to the underlying Os file */
drh23669402006-01-09 17:29:52 +0000343};
344
345/*
346** Add an entry to the end of the global write-op list. pWrite should point
drh17435752007-08-16 04:30:38 +0000347** to an AsyncWrite structure allocated using sqlite3_malloc(). The writer
348** thread will call sqlite3_free() to free the structure after the specified
drhfe0f75b2006-01-10 20:01:18 +0000349** operation has been completed.
drh23669402006-01-09 17:29:52 +0000350**
drhfe0f75b2006-01-10 20:01:18 +0000351** Once an AsyncWrite structure has been added to the list, it becomes the
352** property of the writer thread and must not be read or modified by the
353** caller.
drh23669402006-01-09 17:29:52 +0000354*/
355static void addAsyncWrite(AsyncWrite *pWrite){
drhfe0f75b2006-01-10 20:01:18 +0000356 /* We must hold the queue mutex in order to modify the queue pointers */
drh23669402006-01-09 17:29:52 +0000357 pthread_mutex_lock(&async.queueMutex);
drhfe0f75b2006-01-10 20:01:18 +0000358
359 /* Add the record to the end of the write-op queue */
drh23669402006-01-09 17:29:52 +0000360 assert( !pWrite->pNext );
361 if( async.pQueueLast ){
362 assert( async.pQueueFirst );
363 async.pQueueLast->pNext = pWrite;
364 }else{
365 async.pQueueFirst = pWrite;
366 }
367 async.pQueueLast = pWrite;
drh4f0c5872007-03-26 22:05:01 +0000368 ASYNC_TRACE(("PUSH %p (%s %s %d)\n", pWrite, azOpcodeName[pWrite->op],
danielk19770e87b702007-08-25 12:29:30 +0000369 pWrite->pFileData ? pWrite->pFileData->zName : "-", pWrite->iOffset));
drhfe0f75b2006-01-10 20:01:18 +0000370
danielk1977be29bfc2006-02-14 13:25:43 +0000371 if( pWrite->op==ASYNC_CLOSE ){
372 async.nFile--;
373 if( async.nFile==0 ){
374 async.ioError = SQLITE_OK;
375 }
376 }
377
drhfe0f75b2006-01-10 20:01:18 +0000378 /* Drop the queue mutex */
drh23669402006-01-09 17:29:52 +0000379 pthread_mutex_unlock(&async.queueMutex);
drhfe0f75b2006-01-10 20:01:18 +0000380
381 /* The writer thread might have been idle because there was nothing
382 ** on the write-op queue for it to do. So wake it up. */
drh23669402006-01-09 17:29:52 +0000383 pthread_cond_signal(&async.queueSignal);
384}
385
386/*
danielk1977be29bfc2006-02-14 13:25:43 +0000387** Increment async.nFile in a thread-safe manner.
388*/
389static void incrOpenFileCount(){
390 /* We must hold the queue mutex in order to modify async.nFile */
391 pthread_mutex_lock(&async.queueMutex);
392 if( async.nFile==0 ){
393 async.ioError = SQLITE_OK;
394 }
395 async.nFile++;
396 pthread_mutex_unlock(&async.queueMutex);
397}
398
399/*
drh23669402006-01-09 17:29:52 +0000400** This is a utility function to allocate and populate a new AsyncWrite
401** structure and insert it (via addAsyncWrite() ) into the global list.
402*/
403static int addNewAsyncWrite(
danielk19770e87b702007-08-25 12:29:30 +0000404 AsyncFileData *pFileData,
drh23669402006-01-09 17:29:52 +0000405 int op,
406 i64 iOffset,
407 int nByte,
408 const char *zByte
409){
drh4b74b262006-02-13 13:50:55 +0000410 AsyncWrite *p;
danielk1977be29bfc2006-02-14 13:25:43 +0000411 if( op!=ASYNC_CLOSE && async.ioError ){
412 return async.ioError;
drh4b74b262006-02-13 13:50:55 +0000413 }
drh17435752007-08-16 04:30:38 +0000414 p = sqlite3_malloc(sizeof(AsyncWrite) + (zByte?nByte:0));
drh23669402006-01-09 17:29:52 +0000415 if( !p ){
416 return SQLITE_NOMEM;
417 }
418 p->op = op;
419 p->iOffset = iOffset;
420 p->nByte = nByte;
danielk19770e87b702007-08-25 12:29:30 +0000421 p->pFileData = pFileData;
drh23669402006-01-09 17:29:52 +0000422 p->pNext = 0;
423 if( zByte ){
424 p->zBuf = (char *)&p[1];
425 memcpy(p->zBuf, zByte, nByte);
426 }else{
427 p->zBuf = 0;
428 }
429 addAsyncWrite(p);
430 return SQLITE_OK;
431}
432
433/*
434** Close the file. This just adds an entry to the write-op list, the file is
435** not actually closed.
436*/
danielk19770e87b702007-08-25 12:29:30 +0000437static int asyncClose(sqlite3_file *pFile){
438 AsyncFileData *p = ((AsyncFile *)pFile)->pData;
439 return addNewAsyncWrite(p, ASYNC_CLOSE, 0, 0, 0);
drh23669402006-01-09 17:29:52 +0000440}
441
442/*
443** Implementation of sqlite3OsWrite() for asynchronous files. Instead of
444** writing to the underlying file, this function adds an entry to the end of
445** the global AsyncWrite list. Either SQLITE_OK or SQLITE_NOMEM may be
446** returned.
447*/
danielk19770e87b702007-08-25 12:29:30 +0000448static int asyncWrite(sqlite3_file *pFile, const void *pBuf, int amt, i64 iOff){
449 AsyncFileData *p = ((AsyncFile *)pFile)->pData;
450 return addNewAsyncWrite(p, ASYNC_WRITE, iOff, amt, pBuf);
drh23669402006-01-09 17:29:52 +0000451}
452
453/*
454** Read data from the file. First we read from the filesystem, then adjust
455** the contents of the buffer based on ASYNC_WRITE operations in the
drhfe0f75b2006-01-10 20:01:18 +0000456** write-op queue.
drh23669402006-01-09 17:29:52 +0000457**
458** This method holds the mutex from start to finish.
459*/
danielk19770e87b702007-08-25 12:29:30 +0000460static int asyncRead(sqlite3_file *pFile, void *zOut, int iAmt, i64 iOffset){
461 AsyncFileData *p = ((AsyncFile *)pFile)->pData;
drh23669402006-01-09 17:29:52 +0000462 int rc = SQLITE_OK;
463 i64 filesize;
464 int nRead;
danielk19770e87b702007-08-25 12:29:30 +0000465 sqlite3_file *pBase = p->pBaseRead;
danielk1977750b03e2006-02-14 10:48:39 +0000466
danielk19770e87b702007-08-25 12:29:30 +0000467 /* If an I/O error has previously occurred in this virtual file
468 ** system, then all subsequent operations fail.
drh4b74b262006-02-13 13:50:55 +0000469 */
danielk1977be29bfc2006-02-14 13:25:43 +0000470 if( async.ioError!=SQLITE_OK ){
471 return async.ioError;
drh4b74b262006-02-13 13:50:55 +0000472 }
473
drh23669402006-01-09 17:29:52 +0000474 /* Grab the write queue mutex for the duration of the call */
475 pthread_mutex_lock(&async.queueMutex);
476
danielk19770e87b702007-08-25 12:29:30 +0000477 if( pBase->pMethods ){
danielk1977750b03e2006-02-14 10:48:39 +0000478 rc = sqlite3OsFileSize(pBase, &filesize);
drh23669402006-01-09 17:29:52 +0000479 if( rc!=SQLITE_OK ){
480 goto asyncread_out;
481 }
danielk19770e87b702007-08-25 12:29:30 +0000482 nRead = MIN(filesize - iOffset, iAmt);
drh23669402006-01-09 17:29:52 +0000483 if( nRead>0 ){
danielk19770e87b702007-08-25 12:29:30 +0000484 rc = sqlite3OsRead(pBase, zOut, nRead, iOffset);
485 ASYNC_TRACE(("READ %s %d bytes at %d\n", p->zName, nRead, iOffset));
drh23669402006-01-09 17:29:52 +0000486 }
487 }
488
489 if( rc==SQLITE_OK ){
danielk19770e87b702007-08-25 12:29:30 +0000490 AsyncWrite *pWrite;
drh23669402006-01-09 17:29:52 +0000491
danielk19770e87b702007-08-25 12:29:30 +0000492 for(pWrite=async.pQueueFirst; pWrite; pWrite = pWrite->pNext){
493 if( pWrite->pFileData==p && pWrite->op==ASYNC_WRITE ){
494 int iBeginOut = (pWrite->iOffset-iOffset);
drh44528382006-02-13 13:30:19 +0000495 int iBeginIn = -iBeginOut;
drh23669402006-01-09 17:29:52 +0000496 int nCopy;
497
498 if( iBeginIn<0 ) iBeginIn = 0;
499 if( iBeginOut<0 ) iBeginOut = 0;
danielk19770e87b702007-08-25 12:29:30 +0000500 nCopy = MIN(pWrite->nByte-iBeginIn, iAmt-iBeginOut);
drh23669402006-01-09 17:29:52 +0000501
502 if( nCopy>0 ){
danielk19770e87b702007-08-25 12:29:30 +0000503 memcpy(&((char *)zOut)[iBeginOut], &pWrite->zBuf[iBeginIn], nCopy);
drh4f0c5872007-03-26 22:05:01 +0000504 ASYNC_TRACE(("OVERREAD %d bytes at %d\n", nCopy, iBeginOut+iOffset));
drh23669402006-01-09 17:29:52 +0000505 }
506 }
507 }
drh23669402006-01-09 17:29:52 +0000508 }
509
510asyncread_out:
511 pthread_mutex_unlock(&async.queueMutex);
512 return rc;
513}
514
515/*
danielk19770e87b702007-08-25 12:29:30 +0000516** Truncate the file to nByte bytes in length. This just adds an entry to
517** the write-op list, no IO actually takes place.
drh23669402006-01-09 17:29:52 +0000518*/
danielk19770e87b702007-08-25 12:29:30 +0000519static int asyncTruncate(sqlite3_file *pFile, i64 nByte){
520 AsyncFileData *p = ((AsyncFile *)pFile)->pData;
521 return addNewAsyncWrite(p, ASYNC_TRUNCATE, nByte, 0, 0);
522}
523
524/*
525** Sync the file. This just adds an entry to the write-op list, the
526** sync() is done later by sqlite3_async_flush().
527*/
528static int asyncSync(sqlite3_file *pFile, int flags){
529 AsyncFileData *p = ((AsyncFile *)pFile)->pData;
530 return addNewAsyncWrite(p, ASYNC_SYNC, 0, flags, 0);
drh23669402006-01-09 17:29:52 +0000531}
532
533/*
534** Read the size of the file. First we read the size of the file system
535** entry, then adjust for any ASYNC_WRITE or ASYNC_TRUNCATE operations
536** currently in the write-op list.
537**
538** This method holds the mutex from start to finish.
539*/
danielk19770e87b702007-08-25 12:29:30 +0000540int asyncFileSize(sqlite3_file *pFile, i64 *piSize){
541 AsyncFileData *p = ((AsyncFile *)pFile)->pData;
drh23669402006-01-09 17:29:52 +0000542 int rc = SQLITE_OK;
543 i64 s = 0;
danielk19770e87b702007-08-25 12:29:30 +0000544 sqlite3_file *pBase;
drh23669402006-01-09 17:29:52 +0000545
546 pthread_mutex_lock(&async.queueMutex);
547
548 /* Read the filesystem size from the base file. If pBaseRead is NULL, this
549 ** means the file hasn't been opened yet. In this case all relevant data
550 ** must be in the write-op queue anyway, so we can omit reading from the
551 ** file-system.
552 */
danielk19770e87b702007-08-25 12:29:30 +0000553 pBase = p->pBaseRead;
554 if( pBase->pMethods ){
drh23669402006-01-09 17:29:52 +0000555 rc = sqlite3OsFileSize(pBase, &s);
556 }
557
558 if( rc==SQLITE_OK ){
danielk19770e87b702007-08-25 12:29:30 +0000559 AsyncWrite *pWrite;
560 for(pWrite=async.pQueueFirst; pWrite; pWrite = pWrite->pNext){
561 if( pWrite->pFileData==p ){
562 switch( pWrite->op ){
drh23669402006-01-09 17:29:52 +0000563 case ASYNC_WRITE:
danielk19770e87b702007-08-25 12:29:30 +0000564 s = MAX(pWrite->iOffset + (i64)(pWrite->nByte), s);
drh23669402006-01-09 17:29:52 +0000565 break;
566 case ASYNC_TRUNCATE:
danielk19770e87b702007-08-25 12:29:30 +0000567 s = MIN(s, pWrite->iOffset);
drh23669402006-01-09 17:29:52 +0000568 break;
569 }
570 }
571 }
danielk19770e87b702007-08-25 12:29:30 +0000572 *piSize = s;
drh23669402006-01-09 17:29:52 +0000573 }
574 pthread_mutex_unlock(&async.queueMutex);
575 return rc;
576}
577
578/*
drh89ea9312006-02-13 17:03:47 +0000579** No disk locking is performed. We keep track of locks locally in
580** the async.aLock hash table. Locking should appear to work the same
581** as with standard (unmodified) SQLite as long as all connections
582** come from this one process. Connections from external processes
583** cannot see our internal hash table (obviously) and will thus not
584** honor our locks.
drhfe0f75b2006-01-10 20:01:18 +0000585*/
danielk19770e87b702007-08-25 12:29:30 +0000586static int asyncLock(sqlite3_file *pFile, int lockType){
587 AsyncFileData *p = ((AsyncFile *)pFile)->pData;
588 ASYNC_TRACE(("LOCK %d (%s)\n", lockType, p->zName));
drh89ea9312006-02-13 17:03:47 +0000589 pthread_mutex_lock(&async.lockMutex);
danielk19770e87b702007-08-25 12:29:30 +0000590 sqlite3HashInsert(&async.aLock, p->zName, p->nName, (void*)lockType);
drh89ea9312006-02-13 17:03:47 +0000591 pthread_mutex_unlock(&async.lockMutex);
drh23669402006-01-09 17:29:52 +0000592 return SQLITE_OK;
593}
danielk19770e87b702007-08-25 12:29:30 +0000594static int asyncUnlock(sqlite3_file *pFile, int lockType){
595 return asyncLock(pFile, lockType);
drh23669402006-01-09 17:29:52 +0000596}
597
598/*
599** This function is called when the pager layer first opens a database file
600** and is checking for a hot-journal.
601*/
danielk19770e87b702007-08-25 12:29:30 +0000602static int asyncCheckReservedLock(sqlite3_file *pFile){
603 AsyncFileData *p = ((AsyncFile *)pFile)->pData;
drh89ea9312006-02-13 17:03:47 +0000604 int rc;
605 pthread_mutex_lock(&async.lockMutex);
danielk19770e87b702007-08-25 12:29:30 +0000606 rc = (int)sqlite3HashFind(&async.aLock, p->zName, p->nName);
drh89ea9312006-02-13 17:03:47 +0000607 pthread_mutex_unlock(&async.lockMutex);
danielk19770e87b702007-08-25 12:29:30 +0000608 ASYNC_TRACE(("CHECK-LOCK %d (%s)\n", rc, p->zName));
drh97bbdc02006-02-13 18:35:06 +0000609 return rc>SHARED_LOCK;
drh23669402006-01-09 17:29:52 +0000610}
611
danielk19770e87b702007-08-25 12:29:30 +0000612/*
613** This is a no-op, as the asynchronous backend does not support locking.
614*/
615static int asyncBreakLock(sqlite3_file *id){
616 return SQLITE_OK;
danielk1977b4721172007-03-19 05:54:48 +0000617}
618
drh23669402006-01-09 17:29:52 +0000619/*
620** This is broken. But sqlite3OsLockState() is only used for testing anyway.
621*/
danielk19770e87b702007-08-25 12:29:30 +0000622static int asyncLockState(sqlite3_file *id){
drh23669402006-01-09 17:29:52 +0000623 return SQLITE_OK;
624}
625
danielk19770e87b702007-08-25 12:29:30 +0000626/*
627** Return the device characteristics and sector-size of the device. It
628** is not tricky to implement these correctly, as this backend might
629** not have an open file handle at this point.
drh23669402006-01-09 17:29:52 +0000630*/
danielk19770e87b702007-08-25 12:29:30 +0000631static int asyncSectorSize(sqlite3_file *pFile){
632 return 512;
633}
634static int asyncDeviceCharacteristics(sqlite3_file *pFile){
635 return 0;
636}
drh23669402006-01-09 17:29:52 +0000637
drhfe0f75b2006-01-10 20:01:18 +0000638/*
danielk19770e87b702007-08-25 12:29:30 +0000639** Open a file.
drhfe0f75b2006-01-10 20:01:18 +0000640*/
danielk19770e87b702007-08-25 12:29:30 +0000641static int asyncOpen(
642 sqlite3_vfs *pAsyncVfs,
643 const char *zName,
644 sqlite3_file *pFile,
645 int flags,
646 int *pOutFlags
drh23669402006-01-09 17:29:52 +0000647){
danielk19770e87b702007-08-25 12:29:30 +0000648 static sqlite3_io_methods async_methods = {
649 1, /* iVersion */
650 asyncClose, /* xClose */
651 asyncRead, /* xRead */
652 asyncWrite, /* xWrite */
653 asyncTruncate, /* xTruncate */
654 asyncSync, /* xSync */
655 asyncFileSize, /* xFileSize */
656 asyncLock, /* xLock */
657 asyncUnlock, /* xUnlock */
658 asyncCheckReservedLock, /* xCheckReservedLock */
659 asyncBreakLock, /* xBreakLock */
660 asyncLockState, /* xLockState */
661 asyncSectorSize, /* xSectorSize */
662 asyncDeviceCharacteristics /* xDeviceCharacteristics */
drh23669402006-01-09 17:29:52 +0000663 };
664
danielk19770e87b702007-08-25 12:29:30 +0000665 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
666 AsyncFile *p = (AsyncFile *)pFile;
667 int nName = strlen(zName);;
668 int rc;
669 int nByte;
670 AsyncFileData *pData;
671
672 nByte = (
673 sizeof(AsyncFileData) + /* AsyncFileData structure */
674 2 * pVfs->szOsFile + /* AsyncFileData.zName */
675 nName + 1 /* AsyncFileData.pBaseRead and pBaseWrite */
676 );
677 pData = sqlite3_malloc(nByte);
678 if( !pData ){
679 return SQLITE_NOMEM;
680 }
681 memset(pData, 0, nByte);
682 pData->zName = (char *)&pData[1];
683 pData->nName = nName;
684 pData->pBaseRead = (sqlite3_file *)&pData->zName[nName+1];
685 pData->pBaseWrite = (sqlite3_file *)&pData->zName[nName+1+pVfs->szOsFile];
686 memcpy(pData->zName, zName, nName+1);
687
688 if( flags&SQLITE_OPEN_EXCLUSIVE ){
689 rc = addNewAsyncWrite(pData, ASYNC_OPENEXCLUSIVE, (i64)flags, 0, 0);
690 if( pOutFlags ) *pOutFlags = flags;
691 }else{
692 rc = sqlite3OsOpen(pVfs, zName, pData->pBaseRead, flags, pOutFlags);
693 if( rc==SQLITE_OK && ((*pOutFlags)&SQLITE_OPEN_READWRITE) ){
694 rc = sqlite3OsOpen(pVfs, zName, pData->pBaseWrite, flags, 0);
drh23669402006-01-09 17:29:52 +0000695 }
696 }
697
danielk19770e87b702007-08-25 12:29:30 +0000698 if( rc==SQLITE_OK ){
699 p->pMethod = &async_methods;
700 p->pData = pData;
701 incrOpenFileCount();
702 }else{
703 sqlite3OsClose(pData->pBaseRead);
704 sqlite3OsClose(pData->pBaseWrite);
705 sqlite3_free(pData);
drh23669402006-01-09 17:29:52 +0000706 }
drh23669402006-01-09 17:29:52 +0000707
drh23669402006-01-09 17:29:52 +0000708 return rc;
709}
710
711/*
712** Implementation of sqlite3OsDelete. Add an entry to the end of the
713** write-op queue to perform the delete.
714*/
danielk19770e87b702007-08-25 12:29:30 +0000715static int asyncDelete(sqlite3_vfs *pAsyncVfs, const char *z, int syncDir){
716 return addNewAsyncWrite(0, ASYNC_DELETE, syncDir, strlen(z)+1, z);
drh23669402006-01-09 17:29:52 +0000717}
718
719/*
danielk19770e87b702007-08-25 12:29:30 +0000720** Implementation of sqlite3OsAccess. This method holds the mutex from
721** start to finish.
drh23669402006-01-09 17:29:52 +0000722*/
danielk19770e87b702007-08-25 12:29:30 +0000723static int asyncAccess(sqlite3_vfs *pAsyncVfs, const char *zName, int flags){
drh23669402006-01-09 17:29:52 +0000724 int ret;
725 AsyncWrite *p;
danielk19770e87b702007-08-25 12:29:30 +0000726 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
727
728 assert(flags==SQLITE_ACCESS_READWRITE
drh50d3f902007-08-27 21:10:36 +0000729 || flags==SQLITE_ACCESS_READ
danielk19770e87b702007-08-25 12:29:30 +0000730 || flags==SQLITE_ACCESS_EXISTS
731 );
drh23669402006-01-09 17:29:52 +0000732
733 pthread_mutex_lock(&async.queueMutex);
danielk19770e87b702007-08-25 12:29:30 +0000734 ret = sqlite3OsAccess(pVfs, zName, flags);
735 if( flags==SQLITE_ACCESS_EXISTS ){
736 for(p=async.pQueueFirst; p; p = p->pNext){
737 if( p->op==ASYNC_DELETE && 0==strcmp(p->zBuf, zName) ){
738 ret = 0;
739 }else if( p->op==ASYNC_OPENEXCLUSIVE
740 && 0==strcmp(p->pFileData->zName, zName)
741 ){
742 ret = 1;
743 }
drh23669402006-01-09 17:29:52 +0000744 }
745 }
danielk19770e87b702007-08-25 12:29:30 +0000746 ASYNC_TRACE(("ACCESS(%s): %s = %d\n",
747 flags==SQLITE_ACCESS_READWRITE?"read-write":
drh50d3f902007-08-27 21:10:36 +0000748 flags==SQLITE_ACCESS_READ?"read":"exists"
danielk19770e87b702007-08-25 12:29:30 +0000749 , zName, ret)
750 );
drh23669402006-01-09 17:29:52 +0000751 pthread_mutex_unlock(&async.queueMutex);
752 return ret;
753}
754
danielk19770e87b702007-08-25 12:29:30 +0000755static int asyncGetTempName(sqlite3_vfs *pAsyncVfs, char *zBufOut){
756 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
757 return pVfs->xGetTempName(pVfs, zBufOut);
758}
759static int asyncFullPathname(
760 sqlite3_vfs *pAsyncVfs,
761 const char *zPath,
762 char *zPathOut
763){
764 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
765 return sqlite3OsFullPathname(pVfs, zPath, zPathOut);
766}
767static void *asyncDlOpen(sqlite3_vfs *pAsyncVfs, const char *zPath){
768 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
769 return pVfs->xDlOpen(pVfs, zPath);
770}
771static void asyncDlError(sqlite3_vfs *pAsyncVfs, int nByte, char *zErrMsg){
772 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
773 pVfs->xDlError(pVfs, nByte, zErrMsg);
774}
775static void *asyncDlSym(
776 sqlite3_vfs *pAsyncVfs,
777 void *pHandle,
778 const char *zSymbol
779){
780 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
781 return pVfs->xDlSym(pVfs, pHandle, zSymbol);
782}
783static void asyncDlClose(sqlite3_vfs *pAsyncVfs, void *pHandle){
784 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
785 pVfs->xDlClose(pVfs, pHandle);
786}
787static int asyncRandomness(sqlite3_vfs *pAsyncVfs, int nByte, char *zBufOut){
788 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
789 return pVfs->xRandomness(pVfs, nByte, zBufOut);
790}
791static int asyncSleep(sqlite3_vfs *pAsyncVfs, int nMicro){
792 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
793 return pVfs->xSleep(pVfs, nMicro);
794}
795static int asyncCurrentTime(sqlite3_vfs *pAsyncVfs, double *pTimeOut){
796 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
797 return pVfs->xCurrentTime(pVfs, pTimeOut);
798}
799
800static sqlite3_vfs async_vfs = {
801 1, /* iVersion */
802 sizeof(AsyncFile), /* szOsFile */
803 0, /* mxPathname */
804 0, /* pNext */
805 "async", /* zName */
806 0, /* pAppData */
807 asyncOpen, /* xOpen */
808 asyncDelete, /* xDelete */
809 asyncAccess, /* xAccess */
810 asyncGetTempName, /* xGetTempName */
811 asyncFullPathname, /* xFullPathname */
812 asyncDlOpen, /* xDlOpen */
813 asyncDlError, /* xDlError */
814 asyncDlSym, /* xDlSym */
815 asyncDlClose, /* xDlClose */
816 asyncRandomness, /* xDlError */
817 asyncSleep, /* xDlSym */
818 asyncCurrentTime /* xDlClose */
819};
820
drh23669402006-01-09 17:29:52 +0000821/*
822** Call this routine to enable or disable the
823** asynchronous IO features implemented in this file.
824**
825** This routine is not even remotely threadsafe. Do not call
826** this routine while any SQLite database connections are open.
827*/
828static void asyncEnable(int enable){
danielk19770e87b702007-08-25 12:29:30 +0000829 if( enable ){
830 if( !async_vfs.pAppData ){
831 async_vfs.pAppData = (void *)sqlite3_vfs_find(0);
832 async_vfs.mxPathname = ((sqlite3_vfs *)async_vfs.pAppData)->mxPathname;
833 sqlite3_vfs_register(&async_vfs, 1);
834 sqlite3HashInit(&async.aLock, SQLITE_HASH_BINARY, 1);
835 }
836 }else{
837 if( async_vfs.pAppData ){
838 sqlite3_vfs_unregister(&async_vfs);
839 async_vfs.pAppData = 0;
840 sqlite3HashClear(&async.aLock);
841 }
drh23669402006-01-09 17:29:52 +0000842 }
843}
844
845/*
846** This procedure runs in a separate thread, reading messages off of the
847** write queue and processing them one by one.
848**
849** If async.writerHaltNow is true, then this procedure exits
850** after processing a single message.
851**
852** If async.writerHaltWhenIdle is true, then this procedure exits when
853** the write queue is empty.
854**
855** If both of the above variables are false, this procedure runs
856** indefinately, waiting for operations to be added to the write queue
857** and processing them in the order in which they arrive.
858**
859** An artifical delay of async.ioDelay milliseconds is inserted before
860** each write operation in order to simulate the effect of a slow disk.
861**
862** Only one instance of this procedure may be running at a time.
863*/
864static void *asyncWriterThread(void *NotUsed){
danielk19770e87b702007-08-25 12:29:30 +0000865 sqlite3_vfs *pVfs = (sqlite3_vfs *)(async_vfs.pAppData);
drh23669402006-01-09 17:29:52 +0000866 AsyncWrite *p = 0;
867 int rc = SQLITE_OK;
danielk1977be29bfc2006-02-14 13:25:43 +0000868 int holdingMutex = 0;
drh23669402006-01-09 17:29:52 +0000869
870 if( pthread_mutex_trylock(&async.writerMutex) ){
871 return 0;
872 }
873 while( async.writerHaltNow==0 ){
danielk19770e87b702007-08-25 12:29:30 +0000874 sqlite3_file *pBase = 0;
drh23669402006-01-09 17:29:52 +0000875
danielk1977be29bfc2006-02-14 13:25:43 +0000876 if( !holdingMutex ){
877 pthread_mutex_lock(&async.queueMutex);
878 }
drh23669402006-01-09 17:29:52 +0000879 while( (p = async.pQueueFirst)==0 ){
880 pthread_cond_broadcast(&async.emptySignal);
881 if( async.writerHaltWhenIdle ){
882 pthread_mutex_unlock(&async.queueMutex);
883 break;
884 }else{
drh4f0c5872007-03-26 22:05:01 +0000885 ASYNC_TRACE(("IDLE\n"));
drh23669402006-01-09 17:29:52 +0000886 pthread_cond_wait(&async.queueSignal, &async.queueMutex);
drh4f0c5872007-03-26 22:05:01 +0000887 ASYNC_TRACE(("WAKEUP\n"));
drh23669402006-01-09 17:29:52 +0000888 }
889 }
890 if( p==0 ) break;
danielk1977be29bfc2006-02-14 13:25:43 +0000891 holdingMutex = 1;
drh23669402006-01-09 17:29:52 +0000892
893 /* Right now this thread is holding the mutex on the write-op queue.
894 ** Variable 'p' points to the first entry in the write-op queue. In
895 ** the general case, we hold on to the mutex for the entire body of
896 ** the loop.
897 **
898 ** However in the cases enumerated below, we relinquish the mutex,
899 ** perform the IO, and then re-request the mutex before removing 'p' from
900 ** the head of the write-op queue. The idea is to increase concurrency with
901 ** sqlite threads.
902 **
903 ** * An ASYNC_CLOSE operation.
904 ** * An ASYNC_OPENEXCLUSIVE operation. For this one, we relinquish
905 ** the mutex, call the underlying xOpenExclusive() function, then
906 ** re-aquire the mutex before seting the AsyncFile.pBaseRead
907 ** variable.
908 ** * ASYNC_SYNC and ASYNC_WRITE operations, if
909 ** SQLITE_ASYNC_TWO_FILEHANDLES was set at compile time and two
910 ** file-handles are open for the particular file being "synced".
911 */
danielk1977be29bfc2006-02-14 13:25:43 +0000912 if( async.ioError!=SQLITE_OK && p->op!=ASYNC_CLOSE ){
913 p->op = ASYNC_NOOP;
914 }
danielk19770e87b702007-08-25 12:29:30 +0000915 if( p->pFileData ){
916 pBase = p->pFileData->pBaseWrite;
drh23669402006-01-09 17:29:52 +0000917 if(
918 p->op==ASYNC_CLOSE ||
919 p->op==ASYNC_OPENEXCLUSIVE ||
danielk19770e87b702007-08-25 12:29:30 +0000920 (pBase->pMethods && (p->op==ASYNC_SYNC || p->op==ASYNC_WRITE) )
drh23669402006-01-09 17:29:52 +0000921 ){
922 pthread_mutex_unlock(&async.queueMutex);
923 holdingMutex = 0;
924 }
danielk19770e87b702007-08-25 12:29:30 +0000925 if( !pBase->pMethods ){
926 pBase = p->pFileData->pBaseRead;
drh23669402006-01-09 17:29:52 +0000927 }
928 }
929
930 switch( p->op ){
drh4b74b262006-02-13 13:50:55 +0000931 case ASYNC_NOOP:
932 break;
933
drh23669402006-01-09 17:29:52 +0000934 case ASYNC_WRITE:
935 assert( pBase );
drh4f0c5872007-03-26 22:05:01 +0000936 ASYNC_TRACE(("WRITE %s %d bytes at %d\n",
danielk19770e87b702007-08-25 12:29:30 +0000937 p->pFileData->zName, p->nByte, p->iOffset));
938 rc = sqlite3OsWrite(pBase, (void *)(p->zBuf), p->nByte, p->iOffset);
drh23669402006-01-09 17:29:52 +0000939 break;
940
941 case ASYNC_SYNC:
942 assert( pBase );
danielk19770e87b702007-08-25 12:29:30 +0000943 ASYNC_TRACE(("SYNC %s\n", p->pFileData->zName));
drh23669402006-01-09 17:29:52 +0000944 rc = sqlite3OsSync(pBase, p->nByte);
945 break;
946
947 case ASYNC_TRUNCATE:
948 assert( pBase );
danielk19770e87b702007-08-25 12:29:30 +0000949 ASYNC_TRACE(("TRUNCATE %s to %d bytes\n",
950 p->pFileData->zName, p->iOffset));
drh97bbdc02006-02-13 18:35:06 +0000951 rc = sqlite3OsTruncate(pBase, p->iOffset);
drh23669402006-01-09 17:29:52 +0000952 break;
953
954 case ASYNC_CLOSE:
danielk19770e87b702007-08-25 12:29:30 +0000955 ASYNC_TRACE(("CLOSE %s\n", p->pFileData->zName));
956 sqlite3OsClose(p->pFileData->pBaseWrite);
957 sqlite3OsClose(p->pFileData->pBaseRead);
958 sqlite3_free(p->pFileData);
drh23669402006-01-09 17:29:52 +0000959 break;
960
961 case ASYNC_DELETE:
drh4f0c5872007-03-26 22:05:01 +0000962 ASYNC_TRACE(("DELETE %s\n", p->zBuf));
danielk19770e87b702007-08-25 12:29:30 +0000963 rc = sqlite3OsDelete(pVfs, p->zBuf, (int)p->iOffset);
drh23669402006-01-09 17:29:52 +0000964 break;
965
966 case ASYNC_OPENEXCLUSIVE: {
danielk19770e87b702007-08-25 12:29:30 +0000967 int flags = (int)p->iOffset;
968 AsyncFileData *pData = p->pFileData;
969 ASYNC_TRACE(("OPEN %s flags=%d\n", p->zBuf, (int)p->iOffset));
970 assert(pData->pBaseRead->pMethods==0 && pData->pBaseWrite->pMethods==0);
971 rc = sqlite3OsOpen(pVfs, pData->zName, pData->pBaseRead, flags, 0);
drh23669402006-01-09 17:29:52 +0000972 assert( holdingMutex==0 );
973 pthread_mutex_lock(&async.queueMutex);
974 holdingMutex = 1;
drh23669402006-01-09 17:29:52 +0000975 break;
976 }
977
978 default: assert(!"Illegal value for AsyncWrite.op");
979 }
980
981 /* If we didn't hang on to the mutex during the IO op, obtain it now
982 ** so that the AsyncWrite structure can be safely removed from the
983 ** global write-op queue.
984 */
985 if( !holdingMutex ){
986 pthread_mutex_lock(&async.queueMutex);
987 holdingMutex = 1;
988 }
drh4f0c5872007-03-26 22:05:01 +0000989 /* ASYNC_TRACE(("UNLINK %p\n", p)); */
drh4b74b262006-02-13 13:50:55 +0000990 if( p==async.pQueueLast ){
991 async.pQueueLast = 0;
drh23669402006-01-09 17:29:52 +0000992 }
drh4b74b262006-02-13 13:50:55 +0000993 async.pQueueFirst = p->pNext;
drh17435752007-08-16 04:30:38 +0000994 sqlite3_free(p);
drh23669402006-01-09 17:29:52 +0000995 assert( holdingMutex );
996
danielk1977be29bfc2006-02-14 13:25:43 +0000997 /* An IO error has occured. We cannot report the error back to the
998 ** connection that requested the I/O since the error happened
999 ** asynchronously. The connection has already moved on. There
1000 ** really is nobody to report the error to.
1001 **
1002 ** The file for which the error occured may have been a database or
1003 ** journal file. Regardless, none of the currently queued operations
1004 ** associated with the same database should now be performed. Nor should
1005 ** any subsequently requested IO on either a database or journal file
1006 ** handle for the same database be accepted until the main database
1007 ** file handle has been closed and reopened.
1008 **
1009 ** Furthermore, no further IO should be queued or performed on any file
1010 ** handle associated with a database that may have been part of a
1011 ** multi-file transaction that included the database associated with
1012 ** the IO error (i.e. a database ATTACHed to the same handle at some
1013 ** point in time).
1014 */
1015 if( rc!=SQLITE_OK ){
1016 async.ioError = rc;
1017 }
1018
drh23669402006-01-09 17:29:52 +00001019 /* Drop the queue mutex before continuing to the next write operation
1020 ** in order to give other threads a chance to work with the write queue.
1021 */
danielk1977be29bfc2006-02-14 13:25:43 +00001022 if( !async.pQueueFirst || !async.ioError ){
danielk19772d9fcaa2006-02-14 14:02:08 +00001023 sqlite3ApiExit(0, 0);
danielk1977be29bfc2006-02-14 13:25:43 +00001024 pthread_mutex_unlock(&async.queueMutex);
1025 holdingMutex = 0;
1026 if( async.ioDelay>0 ){
danielk19770e87b702007-08-25 12:29:30 +00001027 sqlite3OsSleep(pVfs, async.ioDelay);
danielk1977be29bfc2006-02-14 13:25:43 +00001028 }else{
1029 sched_yield();
1030 }
drh23669402006-01-09 17:29:52 +00001031 }
1032 }
danielk1977be29bfc2006-02-14 13:25:43 +00001033
drh23669402006-01-09 17:29:52 +00001034 pthread_mutex_unlock(&async.writerMutex);
1035 return 0;
1036}
1037
1038/**************************************************************************
1039** The remaining code defines a Tcl interface for testing the asynchronous
1040** IO implementation in this file.
1041**
1042** To adapt the code to a non-TCL environment, delete or comment out
1043** the code that follows.
1044*/
1045
1046/*
1047** sqlite3async_enable ?YES/NO?
1048**
1049** Enable or disable the asynchronous I/O backend. This command is
1050** not thread-safe. Do not call it while any database connections
1051** are open.
1052*/
1053static int testAsyncEnable(
1054 void * clientData,
1055 Tcl_Interp *interp,
1056 int objc,
1057 Tcl_Obj *CONST objv[]
1058){
1059 if( objc!=1 && objc!=2 ){
1060 Tcl_WrongNumArgs(interp, 1, objv, "?YES/NO?");
1061 return TCL_ERROR;
1062 }
1063 if( objc==1 ){
danielk19770e87b702007-08-25 12:29:30 +00001064 Tcl_SetObjResult(interp, Tcl_NewBooleanObj(async_vfs.pAppData!=0));
drh23669402006-01-09 17:29:52 +00001065 }else{
1066 int en;
1067 if( Tcl_GetBooleanFromObj(interp, objv[1], &en) ) return TCL_ERROR;
1068 asyncEnable(en);
1069 }
1070 return TCL_OK;
1071}
1072
1073/*
1074** sqlite3async_halt "now"|"idle"|"never"
1075**
1076** Set the conditions at which the writer thread will halt.
1077*/
1078static int testAsyncHalt(
1079 void * clientData,
1080 Tcl_Interp *interp,
1081 int objc,
1082 Tcl_Obj *CONST objv[]
1083){
1084 const char *zCond;
1085 if( objc!=2 ){
1086 Tcl_WrongNumArgs(interp, 1, objv, "\"now\"|\"idle\"|\"never\"");
1087 return TCL_ERROR;
1088 }
1089 zCond = Tcl_GetString(objv[1]);
1090 if( strcmp(zCond, "now")==0 ){
1091 async.writerHaltNow = 1;
1092 pthread_cond_broadcast(&async.queueSignal);
1093 }else if( strcmp(zCond, "idle")==0 ){
1094 async.writerHaltWhenIdle = 1;
1095 async.writerHaltNow = 0;
1096 pthread_cond_broadcast(&async.queueSignal);
1097 }else if( strcmp(zCond, "never")==0 ){
1098 async.writerHaltWhenIdle = 0;
1099 async.writerHaltNow = 0;
1100 }else{
1101 Tcl_AppendResult(interp,
1102 "should be one of: \"now\", \"idle\", or \"never\"", (char*)0);
1103 return TCL_ERROR;
1104 }
1105 return TCL_OK;
1106}
1107
1108/*
1109** sqlite3async_delay ?MS?
1110**
1111** Query or set the number of milliseconds of delay in the writer
1112** thread after each write operation. The default is 0. By increasing
1113** the memory delay we can simulate the effect of slow disk I/O.
1114*/
1115static int testAsyncDelay(
1116 void * clientData,
1117 Tcl_Interp *interp,
1118 int objc,
1119 Tcl_Obj *CONST objv[]
1120){
1121 if( objc!=1 && objc!=2 ){
1122 Tcl_WrongNumArgs(interp, 1, objv, "?MS?");
1123 return TCL_ERROR;
1124 }
1125 if( objc==1 ){
1126 Tcl_SetObjResult(interp, Tcl_NewIntObj(async.ioDelay));
1127 }else{
1128 int ioDelay;
1129 if( Tcl_GetIntFromObj(interp, objv[1], &ioDelay) ) return TCL_ERROR;
1130 async.ioDelay = ioDelay;
1131 }
1132 return TCL_OK;
1133}
1134
1135/*
1136** sqlite3async_start
1137**
1138** Start a new writer thread.
1139*/
1140static int testAsyncStart(
1141 void * clientData,
1142 Tcl_Interp *interp,
1143 int objc,
1144 Tcl_Obj *CONST objv[]
1145){
1146 pthread_t x;
1147 int rc;
1148 rc = pthread_create(&x, 0, asyncWriterThread, 0);
1149 if( rc ){
1150 Tcl_AppendResult(interp, "failed to create the thread", 0);
1151 return TCL_ERROR;
1152 }
1153 pthread_detach(x);
1154 return TCL_OK;
1155}
1156
1157/*
1158** sqlite3async_wait
1159**
1160** Wait for the current writer thread to terminate.
1161**
1162** If the current writer thread is set to run forever then this
1163** command would block forever. To prevent that, an error is returned.
1164*/
1165static int testAsyncWait(
1166 void * clientData,
1167 Tcl_Interp *interp,
1168 int objc,
1169 Tcl_Obj *CONST objv[]
1170){
drh89ea9312006-02-13 17:03:47 +00001171 int cnt = 10;
drh23669402006-01-09 17:29:52 +00001172 if( async.writerHaltNow==0 && async.writerHaltWhenIdle==0 ){
1173 Tcl_AppendResult(interp, "would block forever", (char*)0);
1174 return TCL_ERROR;
1175 }
danielk1977750b03e2006-02-14 10:48:39 +00001176
drh89ea9312006-02-13 17:03:47 +00001177 while( cnt-- && !pthread_mutex_trylock(&async.writerMutex) ){
1178 pthread_mutex_unlock(&async.writerMutex);
1179 sched_yield();
1180 }
1181 if( cnt>=0 ){
drh4f0c5872007-03-26 22:05:01 +00001182 ASYNC_TRACE(("WAIT\n"));
drh89ea9312006-02-13 17:03:47 +00001183 pthread_mutex_lock(&async.queueMutex);
1184 pthread_cond_broadcast(&async.queueSignal);
1185 pthread_mutex_unlock(&async.queueMutex);
1186 pthread_mutex_lock(&async.writerMutex);
1187 pthread_mutex_unlock(&async.writerMutex);
1188 }else{
drh4f0c5872007-03-26 22:05:01 +00001189 ASYNC_TRACE(("NO-WAIT\n"));
drh89ea9312006-02-13 17:03:47 +00001190 }
drh23669402006-01-09 17:29:52 +00001191 return TCL_OK;
1192}
1193
1194
drhd677b3d2007-08-20 22:48:41 +00001195#endif /* OS_UNIX and SQLITE_THREADSAFE and defined(SQLITE_ENABLE_REDEF_IO) */
drh23669402006-01-09 17:29:52 +00001196
1197/*
1198** This routine registers the custom TCL commands defined in this
1199** module. This should be the only procedure visible from outside
1200** of this module.
1201*/
1202int Sqlitetestasync_Init(Tcl_Interp *interp){
drhd677b3d2007-08-20 22:48:41 +00001203#if OS_UNIX && SQLITE_THREADSAFE && defined(SQLITE_ENABLE_REDEF_IO)
drh23669402006-01-09 17:29:52 +00001204 Tcl_CreateObjCommand(interp,"sqlite3async_enable",testAsyncEnable,0,0);
1205 Tcl_CreateObjCommand(interp,"sqlite3async_halt",testAsyncHalt,0,0);
1206 Tcl_CreateObjCommand(interp,"sqlite3async_delay",testAsyncDelay,0,0);
1207 Tcl_CreateObjCommand(interp,"sqlite3async_start",testAsyncStart,0,0);
1208 Tcl_CreateObjCommand(interp,"sqlite3async_wait",testAsyncWait,0,0);
drh99681db2006-02-13 15:29:32 +00001209 Tcl_LinkVar(interp, "sqlite3async_trace",
1210 (char*)&sqlite3async_trace, TCL_LINK_INT);
drhd677b3d2007-08-20 22:48:41 +00001211#endif /* OS_UNIX and SQLITE_THREADSAFE and defined(SQLITE_ENABLE_REDEF_IO) */
drh23669402006-01-09 17:29:52 +00001212 return TCL_OK;
1213}