blob: 3e7a7b1539e71fe03ad314f3e4faa490b6a5f742 [file] [log] [blame]
drhbbd42a62004-05-22 17:41:58 +00001/*
2** 2004 May 22
3**
4** The author disclaims copyright to this source code. In place of
5** a legal notice, here is a blessing:
6**
7** May you do good and not evil.
8** May you find forgiveness for yourself and forgive others.
9** May you share freely, never taking more than you give.
10**
11******************************************************************************
12**
13** This file contains code that is specific to Unix systems.
14*/
drhbbd42a62004-05-22 17:41:58 +000015#include "sqliteInt.h"
drheb206252004-10-01 02:00:31 +000016#include "os.h"
17#if OS_UNIX /* This file is used on unix only */
drhbbd42a62004-05-22 17:41:58 +000018
19
20#include <time.h>
drh19e2d372005-08-29 23:00:03 +000021#include <sys/time.h>
drhbbd42a62004-05-22 17:41:58 +000022#include <errno.h>
23#include <unistd.h>
drh0ccebe72005-06-07 22:22:50 +000024
25/*
26** Do not include any of the File I/O interface procedures if the
27** SQLITE_OMIT_DISKIO macro is defined (indicating that there database
28** will be in-memory only)
29*/
30#ifndef SQLITE_OMIT_DISKIO
31
32
33/*
34** Define various macros that are missing from some systems.
35*/
drhbbd42a62004-05-22 17:41:58 +000036#ifndef O_LARGEFILE
37# define O_LARGEFILE 0
38#endif
39#ifdef SQLITE_DISABLE_LFS
40# undef O_LARGEFILE
41# define O_LARGEFILE 0
42#endif
43#ifndef O_NOFOLLOW
44# define O_NOFOLLOW 0
45#endif
46#ifndef O_BINARY
47# define O_BINARY 0
48#endif
49
50/*
51** The DJGPP compiler environment looks mostly like Unix, but it
52** lacks the fcntl() system call. So redefine fcntl() to be something
53** that always succeeds. This means that locking does not occur under
54** DJGPP. But its DOS - what did you expect?
55*/
56#ifdef __DJGPP__
57# define fcntl(A,B,C) 0
58#endif
59
60/*
drhbbd42a62004-05-22 17:41:58 +000061** Include code that is common to all os_*.c files
62*/
63#include "os_common.h"
64
drh2b4b5962005-06-15 17:47:55 +000065/*
66** The threadid macro resolves to the thread-id or to 0. Used for
67** testing and debugging only.
68*/
69#ifdef SQLITE_UNIX_THREADS
70#define threadid pthread_self()
71#else
72#define threadid 0
73#endif
74
75/*
76** Set or check the OsFile.tid field. This field is set when an OsFile
77** is first opened. All subsequent uses of the OsFile verify that the
78** same thread is operating on the OsFile. Some operating systems do
79** not allow locks to be overridden by other threads and that restriction
80** means that sqlite3* database handles cannot be moved from one thread
81** to another. This logic makes sure a user does not try to do that
82** by mistake.
83*/
84#ifdef SQLITE_UNIX_THREADS
85# define SET_THREADID(X) X->tid = pthread_self()
86# define CHECK_THREADID(X) (!pthread_equal(X->tid, pthread_self()))
87#else
88# define SET_THREADID(X)
89# define CHECK_THREADID(X) 0
danielk197713adf8a2004-06-03 16:08:41 +000090#endif
91
drhbbd42a62004-05-22 17:41:58 +000092/*
93** Here is the dirt on POSIX advisory locks: ANSI STD 1003.1 (1996)
94** section 6.5.2.2 lines 483 through 490 specify that when a process
95** sets or clears a lock, that operation overrides any prior locks set
96** by the same process. It does not explicitly say so, but this implies
97** that it overrides locks set by the same process using a different
98** file descriptor. Consider this test case:
99**
100** int fd1 = open("./file1", O_RDWR|O_CREAT, 0644);
101** int fd2 = open("./file2", O_RDWR|O_CREAT, 0644);
102**
103** Suppose ./file1 and ./file2 are really the same file (because
104** one is a hard or symbolic link to the other) then if you set
105** an exclusive lock on fd1, then try to get an exclusive lock
106** on fd2, it works. I would have expected the second lock to
107** fail since there was already a lock on the file due to fd1.
108** But not so. Since both locks came from the same process, the
109** second overrides the first, even though they were on different
110** file descriptors opened on different file names.
111**
112** Bummer. If you ask me, this is broken. Badly broken. It means
113** that we cannot use POSIX locks to synchronize file access among
114** competing threads of the same process. POSIX locks will work fine
115** to synchronize access for threads in separate processes, but not
116** threads within the same process.
117**
118** To work around the problem, SQLite has to manage file locks internally
119** on its own. Whenever a new database is opened, we have to find the
120** specific inode of the database file (the inode is determined by the
121** st_dev and st_ino fields of the stat structure that fstat() fills in)
122** and check for locks already existing on that inode. When locks are
123** created or removed, we have to look at our own internal record of the
124** locks to see if another thread has previously set a lock on that same
125** inode.
126**
127** The OsFile structure for POSIX is no longer just an integer file
128** descriptor. It is now a structure that holds the integer file
129** descriptor and a pointer to a structure that describes the internal
130** locks on the corresponding inode. There is one locking structure
131** per inode, so if the same inode is opened twice, both OsFile structures
132** point to the same locking structure. The locking structure keeps
133** a reference count (so we will know when to delete it) and a "cnt"
134** field that tells us its internal lock status. cnt==0 means the
135** file is unlocked. cnt==-1 means the file has an exclusive lock.
136** cnt>0 means there are cnt shared locks on the file.
137**
138** Any attempt to lock or unlock a file first checks the locking
139** structure. The fcntl() system call is only invoked to set a
140** POSIX lock if the internal lock structure transitions between
141** a locked and an unlocked state.
142**
143** 2004-Jan-11:
144** More recent discoveries about POSIX advisory locks. (The more
145** I discover, the more I realize the a POSIX advisory locks are
146** an abomination.)
147**
148** If you close a file descriptor that points to a file that has locks,
149** all locks on that file that are owned by the current process are
150** released. To work around this problem, each OsFile structure contains
151** a pointer to an openCnt structure. There is one openCnt structure
152** per open inode, which means that multiple OsFiles can point to a single
153** openCnt. When an attempt is made to close an OsFile, if there are
154** other OsFiles open on the same inode that are holding locks, the call
155** to close() the file descriptor is deferred until all of the locks clear.
156** The openCnt structure keeps a list of file descriptors that need to
157** be closed and that list is walked (and cleared) when the last lock
158** clears.
159**
160** First, under Linux threads, because each thread has a separate
161** process ID, lock operations in one thread do not override locks
162** to the same file in other threads. Linux threads behave like
163** separate processes in this respect. But, if you close a file
164** descriptor in linux threads, all locks are cleared, even locks
165** on other threads and even though the other threads have different
166** process IDs. Linux threads is inconsistent in this respect.
167** (I'm beginning to think that linux threads is an abomination too.)
168** The consequence of this all is that the hash table for the lockInfo
169** structure has to include the process id as part of its key because
170** locks in different threads are treated as distinct. But the
171** openCnt structure should not include the process id in its
172** key because close() clears lock on all threads, not just the current
173** thread. Were it not for this goofiness in linux threads, we could
174** combine the lockInfo and openCnt structures into a single structure.
drh5fdae772004-06-29 03:29:00 +0000175**
176** 2004-Jun-28:
177** On some versions of linux, threads can override each others locks.
178** On others not. Sometimes you can change the behavior on the same
179** system by setting the LD_ASSUME_KERNEL environment variable. The
180** POSIX standard is silent as to which behavior is correct, as far
181** as I can tell, so other versions of unix might show the same
182** inconsistency. There is no little doubt in my mind that posix
183** advisory locks and linux threads are profoundly broken.
184**
185** To work around the inconsistencies, we have to test at runtime
186** whether or not threads can override each others locks. This test
187** is run once, the first time any lock is attempted. A static
188** variable is set to record the results of this test for future
189** use.
drhbbd42a62004-05-22 17:41:58 +0000190*/
191
192/*
193** An instance of the following structure serves as the key used
drh5fdae772004-06-29 03:29:00 +0000194** to locate a particular lockInfo structure given its inode.
195**
196** If threads cannot override each others locks, then we set the
197** lockKey.tid field to the thread ID. If threads can override
198** each others locks then tid is always set to zero. tid is also
199** set to zero if we compile without threading support.
drhbbd42a62004-05-22 17:41:58 +0000200*/
201struct lockKey {
drh5fdae772004-06-29 03:29:00 +0000202 dev_t dev; /* Device number */
203 ino_t ino; /* Inode number */
204#ifdef SQLITE_UNIX_THREADS
205 pthread_t tid; /* Thread ID or zero if threads cannot override each other */
206#endif
drhbbd42a62004-05-22 17:41:58 +0000207};
208
209/*
210** An instance of the following structure is allocated for each open
211** inode on each thread with a different process ID. (Threads have
212** different process IDs on linux, but not on most other unixes.)
213**
214** A single inode can have multiple file descriptors, so each OsFile
215** structure contains a pointer to an instance of this object and this
216** object keeps a count of the number of OsFiles pointing to it.
217*/
218struct lockInfo {
219 struct lockKey key; /* The lookup key */
drh2ac3ee92004-06-07 16:27:46 +0000220 int cnt; /* Number of SHARED locks held */
danielk19779a1d0ab2004-06-01 14:09:28 +0000221 int locktype; /* One of SHARED_LOCK, RESERVED_LOCK etc. */
drhbbd42a62004-05-22 17:41:58 +0000222 int nRef; /* Number of pointers to this structure */
223};
224
225/*
226** An instance of the following structure serves as the key used
227** to locate a particular openCnt structure given its inode. This
drh5fdae772004-06-29 03:29:00 +0000228** is the same as the lockKey except that the thread ID is omitted.
drhbbd42a62004-05-22 17:41:58 +0000229*/
230struct openKey {
231 dev_t dev; /* Device number */
232 ino_t ino; /* Inode number */
233};
234
235/*
236** An instance of the following structure is allocated for each open
237** inode. This structure keeps track of the number of locks on that
238** inode. If a close is attempted against an inode that is holding
239** locks, the close is deferred until all locks clear by adding the
240** file descriptor to be closed to the pending list.
241*/
242struct openCnt {
243 struct openKey key; /* The lookup key */
244 int nRef; /* Number of pointers to this structure */
245 int nLock; /* Number of outstanding locks */
246 int nPending; /* Number of pending close() operations */
247 int *aPending; /* Malloced space holding fd's awaiting a close() */
248};
249
250/*
251** These hash table maps inodes and process IDs into lockInfo and openCnt
252** structures. Access to these hash tables must be protected by a mutex.
253*/
254static Hash lockHash = { SQLITE_HASH_BINARY, 0, 0, 0, 0, 0 };
255static Hash openHash = { SQLITE_HASH_BINARY, 0, 0, 0, 0, 0 };
256
drh5fdae772004-06-29 03:29:00 +0000257
258#ifdef SQLITE_UNIX_THREADS
259/*
260** This variable records whether or not threads can override each others
261** locks.
262**
263** 0: No. Threads cannot override each others locks.
264** 1: Yes. Threads can override each others locks.
265** -1: We don't know yet.
266*/
267static int threadsOverrideEachOthersLocks = -1;
268
269/*
270** This structure holds information passed into individual test
271** threads by the testThreadLockingBehavior() routine.
272*/
273struct threadTestData {
274 int fd; /* File to be locked */
275 struct flock lock; /* The locking operation */
276 int result; /* Result of the locking operation */
277};
278
drh2b4b5962005-06-15 17:47:55 +0000279#ifdef SQLITE_LOCK_TRACE
280/*
281** Print out information about all locking operations.
282**
283** This routine is used for troubleshooting locks on multithreaded
284** platforms. Enable by compiling with the -DSQLITE_LOCK_TRACE
285** command-line option on the compiler. This code is normally
286** turnned off.
287*/
288static int lockTrace(int fd, int op, struct flock *p){
289 char *zOpName, *zType;
290 int s;
291 int savedErrno;
292 if( op==F_GETLK ){
293 zOpName = "GETLK";
294 }else if( op==F_SETLK ){
295 zOpName = "SETLK";
296 }else{
297 s = fcntl(fd, op, p);
298 sqlite3DebugPrintf("fcntl unknown %d %d %d\n", fd, op, s);
299 return s;
300 }
301 if( p->l_type==F_RDLCK ){
302 zType = "RDLCK";
303 }else if( p->l_type==F_WRLCK ){
304 zType = "WRLCK";
305 }else if( p->l_type==F_UNLCK ){
306 zType = "UNLCK";
307 }else{
308 assert( 0 );
309 }
310 assert( p->l_whence==SEEK_SET );
311 s = fcntl(fd, op, p);
312 savedErrno = errno;
313 sqlite3DebugPrintf("fcntl %d %d %s %s %d %d %d %d\n",
314 threadid, fd, zOpName, zType, (int)p->l_start, (int)p->l_len,
315 (int)p->l_pid, s);
316 if( s && op==F_SETLK && (p->l_type==F_RDLCK || p->l_type==F_WRLCK) ){
317 struct flock l2;
318 l2 = *p;
319 fcntl(fd, F_GETLK, &l2);
320 if( l2.l_type==F_RDLCK ){
321 zType = "RDLCK";
322 }else if( l2.l_type==F_WRLCK ){
323 zType = "WRLCK";
324 }else if( l2.l_type==F_UNLCK ){
325 zType = "UNLCK";
326 }else{
327 assert( 0 );
328 }
329 sqlite3DebugPrintf("fcntl-failure-reason: %s %d %d %d\n",
330 zType, (int)l2.l_start, (int)l2.l_len, (int)l2.l_pid);
331 }
332 errno = savedErrno;
333 return s;
334}
335#define fcntl lockTrace
336#endif /* SQLITE_LOCK_TRACE */
337
drh5fdae772004-06-29 03:29:00 +0000338/*
339** The testThreadLockingBehavior() routine launches two separate
340** threads on this routine. This routine attempts to lock a file
341** descriptor then returns. The success or failure of that attempt
342** allows the testThreadLockingBehavior() procedure to determine
343** whether or not threads can override each others locks.
344*/
345static void *threadLockingTest(void *pArg){
346 struct threadTestData *pData = (struct threadTestData*)pArg;
347 pData->result = fcntl(pData->fd, F_SETLK, &pData->lock);
348 return pArg;
349}
350
351/*
352** This procedure attempts to determine whether or not threads
353** can override each others locks then sets the
354** threadsOverrideEachOthersLocks variable appropriately.
355*/
356static void testThreadLockingBehavior(fd_orig){
357 int fd;
358 struct threadTestData d[2];
359 pthread_t t[2];
360
361 fd = dup(fd_orig);
362 if( fd<0 ) return;
363 memset(d, 0, sizeof(d));
364 d[0].fd = fd;
365 d[0].lock.l_type = F_RDLCK;
366 d[0].lock.l_len = 1;
367 d[0].lock.l_start = 0;
368 d[0].lock.l_whence = SEEK_SET;
369 d[1] = d[0];
370 d[1].lock.l_type = F_WRLCK;
371 pthread_create(&t[0], 0, threadLockingTest, &d[0]);
372 pthread_create(&t[1], 0, threadLockingTest, &d[1]);
373 pthread_join(t[0], 0);
374 pthread_join(t[1], 0);
375 close(fd);
376 threadsOverrideEachOthersLocks = d[0].result==0 && d[1].result==0;
377}
378#endif /* SQLITE_UNIX_THREADS */
379
drhbbd42a62004-05-22 17:41:58 +0000380/*
381** Release a lockInfo structure previously allocated by findLockInfo().
382*/
383static void releaseLockInfo(struct lockInfo *pLock){
384 pLock->nRef--;
385 if( pLock->nRef==0 ){
386 sqlite3HashInsert(&lockHash, &pLock->key, sizeof(pLock->key), 0);
387 sqliteFree(pLock);
388 }
389}
390
391/*
392** Release a openCnt structure previously allocated by findLockInfo().
393*/
394static void releaseOpenCnt(struct openCnt *pOpen){
395 pOpen->nRef--;
396 if( pOpen->nRef==0 ){
397 sqlite3HashInsert(&openHash, &pOpen->key, sizeof(pOpen->key), 0);
398 sqliteFree(pOpen->aPending);
399 sqliteFree(pOpen);
400 }
401}
402
403/*
404** Given a file descriptor, locate lockInfo and openCnt structures that
405** describes that file descriptor. Create a new ones if necessary. The
406** return values might be unset if an error occurs.
407**
408** Return the number of errors.
409*/
drh38f82712004-06-18 17:10:16 +0000410static int findLockInfo(
drhbbd42a62004-05-22 17:41:58 +0000411 int fd, /* The file descriptor used in the key */
412 struct lockInfo **ppLock, /* Return the lockInfo structure here */
drh5fdae772004-06-29 03:29:00 +0000413 struct openCnt **ppOpen /* Return the openCnt structure here */
drhbbd42a62004-05-22 17:41:58 +0000414){
415 int rc;
416 struct lockKey key1;
417 struct openKey key2;
418 struct stat statbuf;
419 struct lockInfo *pLock;
420 struct openCnt *pOpen;
421 rc = fstat(fd, &statbuf);
422 if( rc!=0 ) return 1;
423 memset(&key1, 0, sizeof(key1));
424 key1.dev = statbuf.st_dev;
425 key1.ino = statbuf.st_ino;
drh5fdae772004-06-29 03:29:00 +0000426#ifdef SQLITE_UNIX_THREADS
427 if( threadsOverrideEachOthersLocks<0 ){
428 testThreadLockingBehavior(fd);
429 }
430 key1.tid = threadsOverrideEachOthersLocks ? 0 : pthread_self();
431#endif
drhbbd42a62004-05-22 17:41:58 +0000432 memset(&key2, 0, sizeof(key2));
433 key2.dev = statbuf.st_dev;
434 key2.ino = statbuf.st_ino;
435 pLock = (struct lockInfo*)sqlite3HashFind(&lockHash, &key1, sizeof(key1));
436 if( pLock==0 ){
437 struct lockInfo *pOld;
438 pLock = sqliteMallocRaw( sizeof(*pLock) );
439 if( pLock==0 ) return 1;
440 pLock->key = key1;
441 pLock->nRef = 1;
442 pLock->cnt = 0;
danielk19779a1d0ab2004-06-01 14:09:28 +0000443 pLock->locktype = 0;
drhbbd42a62004-05-22 17:41:58 +0000444 pOld = sqlite3HashInsert(&lockHash, &pLock->key, sizeof(key1), pLock);
445 if( pOld!=0 ){
446 assert( pOld==pLock );
447 sqliteFree(pLock);
448 return 1;
449 }
450 }else{
451 pLock->nRef++;
452 }
453 *ppLock = pLock;
454 pOpen = (struct openCnt*)sqlite3HashFind(&openHash, &key2, sizeof(key2));
455 if( pOpen==0 ){
456 struct openCnt *pOld;
457 pOpen = sqliteMallocRaw( sizeof(*pOpen) );
458 if( pOpen==0 ){
459 releaseLockInfo(pLock);
460 return 1;
461 }
462 pOpen->key = key2;
463 pOpen->nRef = 1;
464 pOpen->nLock = 0;
465 pOpen->nPending = 0;
466 pOpen->aPending = 0;
467 pOld = sqlite3HashInsert(&openHash, &pOpen->key, sizeof(key2), pOpen);
468 if( pOld!=0 ){
469 assert( pOld==pOpen );
470 sqliteFree(pOpen);
471 releaseLockInfo(pLock);
472 return 1;
473 }
474 }else{
475 pOpen->nRef++;
476 }
477 *ppOpen = pOpen;
478 return 0;
479}
480
481/*
482** Delete the named file
483*/
484int sqlite3OsDelete(const char *zFilename){
485 unlink(zFilename);
486 return SQLITE_OK;
487}
488
489/*
490** Return TRUE if the named file exists.
491*/
492int sqlite3OsFileExists(const char *zFilename){
493 return access(zFilename, 0)==0;
494}
495
496/*
497** Attempt to open a file for both reading and writing. If that
498** fails, try opening it read-only. If the file does not exist,
499** try to create it.
500**
501** On success, a handle for the open file is written to *id
502** and *pReadonly is set to 0 if the file was opened for reading and
503** writing or 1 if the file was opened read-only. The function returns
504** SQLITE_OK.
505**
506** On failure, the function returns SQLITE_CANTOPEN and leaves
507** *id and *pReadonly unchanged.
508*/
509int sqlite3OsOpenReadWrite(
510 const char *zFilename,
511 OsFile *id,
512 int *pReadonly
513){
514 int rc;
drhda71ce12004-06-21 18:14:45 +0000515 assert( !id->isOpen );
drhbbd42a62004-05-22 17:41:58 +0000516 id->dirfd = -1;
drh2b4b5962005-06-15 17:47:55 +0000517 SET_THREADID(id);
drh8e855772005-05-17 11:25:31 +0000518 id->h = open(zFilename, O_RDWR|O_CREAT|O_LARGEFILE|O_BINARY,
519 SQLITE_DEFAULT_FILE_PERMISSIONS);
drha6abd042004-06-09 17:37:22 +0000520 if( id->h<0 ){
drh6458e392004-07-20 01:14:13 +0000521#ifdef EISDIR
522 if( errno==EISDIR ){
523 return SQLITE_CANTOPEN;
524 }
525#endif
drha6abd042004-06-09 17:37:22 +0000526 id->h = open(zFilename, O_RDONLY|O_LARGEFILE|O_BINARY);
527 if( id->h<0 ){
drhbbd42a62004-05-22 17:41:58 +0000528 return SQLITE_CANTOPEN;
529 }
530 *pReadonly = 1;
531 }else{
532 *pReadonly = 0;
533 }
534 sqlite3OsEnterMutex();
drha6abd042004-06-09 17:37:22 +0000535 rc = findLockInfo(id->h, &id->pLock, &id->pOpen);
drhbbd42a62004-05-22 17:41:58 +0000536 sqlite3OsLeaveMutex();
537 if( rc ){
drha6abd042004-06-09 17:37:22 +0000538 close(id->h);
drhbbd42a62004-05-22 17:41:58 +0000539 return SQLITE_NOMEM;
540 }
danielk197713adf8a2004-06-03 16:08:41 +0000541 id->locktype = 0;
drhda71ce12004-06-21 18:14:45 +0000542 id->isOpen = 1;
drha6abd042004-06-09 17:37:22 +0000543 TRACE3("OPEN %-3d %s\n", id->h, zFilename);
drhbbd42a62004-05-22 17:41:58 +0000544 OpenCounter(+1);
545 return SQLITE_OK;
546}
547
548
549/*
550** Attempt to open a new file for exclusive access by this process.
551** The file will be opened for both reading and writing. To avoid
552** a potential security problem, we do not allow the file to have
553** previously existed. Nor do we allow the file to be a symbolic
554** link.
555**
556** If delFlag is true, then make arrangements to automatically delete
557** the file when it is closed.
558**
559** On success, write the file handle into *id and return SQLITE_OK.
560**
561** On failure, return SQLITE_CANTOPEN.
562*/
563int sqlite3OsOpenExclusive(const char *zFilename, OsFile *id, int delFlag){
564 int rc;
drhda71ce12004-06-21 18:14:45 +0000565 assert( !id->isOpen );
drhbbd42a62004-05-22 17:41:58 +0000566 if( access(zFilename, 0)==0 ){
567 return SQLITE_CANTOPEN;
568 }
drh2b4b5962005-06-15 17:47:55 +0000569 SET_THREADID(id);
drhbbd42a62004-05-22 17:41:58 +0000570 id->dirfd = -1;
drha6abd042004-06-09 17:37:22 +0000571 id->h = open(zFilename,
drhd6459672005-08-13 17:17:01 +0000572 O_RDWR|O_CREAT|O_EXCL|O_NOFOLLOW|O_LARGEFILE|O_BINARY,
573 SQLITE_DEFAULT_FILE_PERMISSIONS);
drha6abd042004-06-09 17:37:22 +0000574 if( id->h<0 ){
drhbbd42a62004-05-22 17:41:58 +0000575 return SQLITE_CANTOPEN;
576 }
577 sqlite3OsEnterMutex();
drha6abd042004-06-09 17:37:22 +0000578 rc = findLockInfo(id->h, &id->pLock, &id->pOpen);
drhbbd42a62004-05-22 17:41:58 +0000579 sqlite3OsLeaveMutex();
580 if( rc ){
drha6abd042004-06-09 17:37:22 +0000581 close(id->h);
drhbbd42a62004-05-22 17:41:58 +0000582 unlink(zFilename);
583 return SQLITE_NOMEM;
584 }
danielk197713adf8a2004-06-03 16:08:41 +0000585 id->locktype = 0;
drhda71ce12004-06-21 18:14:45 +0000586 id->isOpen = 1;
drhbbd42a62004-05-22 17:41:58 +0000587 if( delFlag ){
588 unlink(zFilename);
589 }
drha6abd042004-06-09 17:37:22 +0000590 TRACE3("OPEN-EX %-3d %s\n", id->h, zFilename);
drhbbd42a62004-05-22 17:41:58 +0000591 OpenCounter(+1);
592 return SQLITE_OK;
593}
594
595/*
596** Attempt to open a new file for read-only access.
597**
598** On success, write the file handle into *id and return SQLITE_OK.
599**
600** On failure, return SQLITE_CANTOPEN.
601*/
602int sqlite3OsOpenReadOnly(const char *zFilename, OsFile *id){
603 int rc;
drhda71ce12004-06-21 18:14:45 +0000604 assert( !id->isOpen );
drh2b4b5962005-06-15 17:47:55 +0000605 SET_THREADID(id);
drhbbd42a62004-05-22 17:41:58 +0000606 id->dirfd = -1;
drha6abd042004-06-09 17:37:22 +0000607 id->h = open(zFilename, O_RDONLY|O_LARGEFILE|O_BINARY);
608 if( id->h<0 ){
drhbbd42a62004-05-22 17:41:58 +0000609 return SQLITE_CANTOPEN;
610 }
611 sqlite3OsEnterMutex();
drha6abd042004-06-09 17:37:22 +0000612 rc = findLockInfo(id->h, &id->pLock, &id->pOpen);
drhbbd42a62004-05-22 17:41:58 +0000613 sqlite3OsLeaveMutex();
614 if( rc ){
drha6abd042004-06-09 17:37:22 +0000615 close(id->h);
drhbbd42a62004-05-22 17:41:58 +0000616 return SQLITE_NOMEM;
617 }
danielk197713adf8a2004-06-03 16:08:41 +0000618 id->locktype = 0;
drhda71ce12004-06-21 18:14:45 +0000619 id->isOpen = 1;
drha6abd042004-06-09 17:37:22 +0000620 TRACE3("OPEN-RO %-3d %s\n", id->h, zFilename);
drhbbd42a62004-05-22 17:41:58 +0000621 OpenCounter(+1);
622 return SQLITE_OK;
623}
624
625/*
626** Attempt to open a file descriptor for the directory that contains a
627** file. This file descriptor can be used to fsync() the directory
628** in order to make sure the creation of a new file is actually written
629** to disk.
630**
631** This routine is only meaningful for Unix. It is a no-op under
632** windows since windows does not support hard links.
633**
634** On success, a handle for a previously open file is at *id is
635** updated with the new directory file descriptor and SQLITE_OK is
636** returned.
637**
638** On failure, the function returns SQLITE_CANTOPEN and leaves
639** *id unchanged.
640*/
641int sqlite3OsOpenDirectory(
642 const char *zDirname,
643 OsFile *id
644){
drhda71ce12004-06-21 18:14:45 +0000645 if( !id->isOpen ){
drhbbd42a62004-05-22 17:41:58 +0000646 /* Do not open the directory if the corresponding file is not already
647 ** open. */
648 return SQLITE_CANTOPEN;
649 }
drh2b4b5962005-06-15 17:47:55 +0000650 SET_THREADID(id);
drhbbd42a62004-05-22 17:41:58 +0000651 assert( id->dirfd<0 );
drh8e855772005-05-17 11:25:31 +0000652 id->dirfd = open(zDirname, O_RDONLY|O_BINARY, 0);
drhbbd42a62004-05-22 17:41:58 +0000653 if( id->dirfd<0 ){
654 return SQLITE_CANTOPEN;
655 }
656 TRACE3("OPENDIR %-3d %s\n", id->dirfd, zDirname);
657 return SQLITE_OK;
658}
659
660/*
drhab3f9fe2004-08-14 17:10:10 +0000661** If the following global variable points to a string which is the
662** name of a directory, then that directory will be used to store
663** temporary files.
664*/
tpoindex9a09a3c2004-12-20 19:01:32 +0000665char *sqlite3_temp_directory = 0;
drhab3f9fe2004-08-14 17:10:10 +0000666
667/*
drhbbd42a62004-05-22 17:41:58 +0000668** Create a temporary file name in zBuf. zBuf must be big enough to
669** hold at least SQLITE_TEMPNAME_SIZE characters.
670*/
671int sqlite3OsTempFileName(char *zBuf){
672 static const char *azDirs[] = {
drhab3f9fe2004-08-14 17:10:10 +0000673 0,
drhbbd42a62004-05-22 17:41:58 +0000674 "/var/tmp",
675 "/usr/tmp",
676 "/tmp",
677 ".",
678 };
drh57196282004-10-06 15:41:16 +0000679 static const unsigned char zChars[] =
drhbbd42a62004-05-22 17:41:58 +0000680 "abcdefghijklmnopqrstuvwxyz"
681 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
682 "0123456789";
683 int i, j;
684 struct stat buf;
685 const char *zDir = ".";
drheffd02b2004-08-29 23:42:13 +0000686 azDirs[0] = sqlite3_temp_directory;
drhbbd42a62004-05-22 17:41:58 +0000687 for(i=0; i<sizeof(azDirs)/sizeof(azDirs[0]); i++){
drhab3f9fe2004-08-14 17:10:10 +0000688 if( azDirs[i]==0 ) continue;
drhbbd42a62004-05-22 17:41:58 +0000689 if( stat(azDirs[i], &buf) ) continue;
690 if( !S_ISDIR(buf.st_mode) ) continue;
691 if( access(azDirs[i], 07) ) continue;
692 zDir = azDirs[i];
693 break;
694 }
695 do{
696 sprintf(zBuf, "%s/"TEMP_FILE_PREFIX, zDir);
697 j = strlen(zBuf);
698 sqlite3Randomness(15, &zBuf[j]);
699 for(i=0; i<15; i++, j++){
700 zBuf[j] = (char)zChars[ ((unsigned char)zBuf[j])%(sizeof(zChars)-1) ];
701 }
702 zBuf[j] = 0;
703 }while( access(zBuf,0)==0 );
704 return SQLITE_OK;
705}
706
drh268283b2005-01-08 15:44:25 +0000707#ifndef SQLITE_OMIT_PAGER_PRAGMAS
drhbbd42a62004-05-22 17:41:58 +0000708/*
tpoindex9a09a3c2004-12-20 19:01:32 +0000709** Check that a given pathname is a directory and is writable
710**
711*/
712int sqlite3OsIsDirWritable(char *zBuf){
713 struct stat buf;
714 if( zBuf==0 ) return 0;
drh268283b2005-01-08 15:44:25 +0000715 if( zBuf[0]==0 ) return 0;
tpoindex9a09a3c2004-12-20 19:01:32 +0000716 if( stat(zBuf, &buf) ) return 0;
717 if( !S_ISDIR(buf.st_mode) ) return 0;
718 if( access(zBuf, 07) ) return 0;
719 return 1;
720}
drh268283b2005-01-08 15:44:25 +0000721#endif /* SQLITE_OMIT_PAGER_PRAGMAS */
tpoindex9a09a3c2004-12-20 19:01:32 +0000722
723/*
drhbbd42a62004-05-22 17:41:58 +0000724** Read data from a file into a buffer. Return SQLITE_OK if all
725** bytes were read successfully and SQLITE_IOERR if anything goes
726** wrong.
727*/
728int sqlite3OsRead(OsFile *id, void *pBuf, int amt){
729 int got;
drhda71ce12004-06-21 18:14:45 +0000730 assert( id->isOpen );
drhbbd42a62004-05-22 17:41:58 +0000731 SimulateIOError(SQLITE_IOERR);
732 TIMER_START;
drha6abd042004-06-09 17:37:22 +0000733 got = read(id->h, pBuf, amt);
drhbbd42a62004-05-22 17:41:58 +0000734 TIMER_END;
drhe29b9152005-03-18 14:03:15 +0000735 TRACE5("READ %-3d %5d %7d %d\n", id->h, got, last_page, TIMER_ELAPSED);
drhbbd42a62004-05-22 17:41:58 +0000736 SEEK(0);
737 /* if( got<0 ) got = 0; */
738 if( got==amt ){
739 return SQLITE_OK;
740 }else{
741 return SQLITE_IOERR;
742 }
743}
744
745/*
746** Write data from a buffer into a file. Return SQLITE_OK on success
747** or some other error code on failure.
748*/
749int sqlite3OsWrite(OsFile *id, const void *pBuf, int amt){
750 int wrote = 0;
drhda71ce12004-06-21 18:14:45 +0000751 assert( id->isOpen );
drh4c7f9412005-02-03 00:29:47 +0000752 assert( amt>0 );
drhbbd42a62004-05-22 17:41:58 +0000753 SimulateIOError(SQLITE_IOERR);
drh047d4832004-10-01 14:38:02 +0000754 SimulateDiskfullError;
drhbbd42a62004-05-22 17:41:58 +0000755 TIMER_START;
drha6abd042004-06-09 17:37:22 +0000756 while( amt>0 && (wrote = write(id->h, pBuf, amt))>0 ){
drhbbd42a62004-05-22 17:41:58 +0000757 amt -= wrote;
758 pBuf = &((char*)pBuf)[wrote];
759 }
760 TIMER_END;
drhe29b9152005-03-18 14:03:15 +0000761 TRACE5("WRITE %-3d %5d %7d %d\n", id->h, wrote, last_page, TIMER_ELAPSED);
drhbbd42a62004-05-22 17:41:58 +0000762 SEEK(0);
763 if( amt>0 ){
764 return SQLITE_FULL;
765 }
766 return SQLITE_OK;
767}
768
769/*
770** Move the read/write pointer in a file.
771*/
drheb206252004-10-01 02:00:31 +0000772int sqlite3OsSeek(OsFile *id, i64 offset){
drhda71ce12004-06-21 18:14:45 +0000773 assert( id->isOpen );
drhbbd42a62004-05-22 17:41:58 +0000774 SEEK(offset/1024 + 1);
drhb4746b92005-09-09 01:32:06 +0000775#ifdef SQLITE_TEST
776 if( offset ) SimulateDiskfullError
777#endif
drha6abd042004-06-09 17:37:22 +0000778 lseek(id->h, offset, SEEK_SET);
drhbbd42a62004-05-22 17:41:58 +0000779 return SQLITE_OK;
780}
781
drhb851b2c2005-03-10 14:11:12 +0000782#ifdef SQLITE_TEST
783/*
784** Count the number of fullsyncs and normal syncs. This is used to test
785** that syncs and fullsyncs are occuring at the right times.
786*/
787int sqlite3_sync_count = 0;
788int sqlite3_fullsync_count = 0;
789#endif
790
drhf2f23912005-10-05 10:29:36 +0000791/*
792** Use the fdatasync() API only if the HAVE_FDATASYNC macro is defined.
793** Otherwise use fsync() in its place.
794*/
795#ifndef HAVE_FDATASYNC
796# define fdatasync fsync
797#endif
798
drhb851b2c2005-03-10 14:11:12 +0000799
drhbbd42a62004-05-22 17:41:58 +0000800/*
drhdd809b02004-07-17 21:44:57 +0000801** The fsync() system call does not work as advertised on many
802** unix systems. The following procedure is an attempt to make
803** it work better.
drh1398ad32005-01-19 23:24:50 +0000804**
805** The SQLITE_NO_SYNC macro disables all fsync()s. This is useful
806** for testing when we want to run through the test suite quickly.
807** You are strongly advised *not* to deploy with SQLITE_NO_SYNC
808** enabled, however, since with SQLITE_NO_SYNC enabled, an OS crash
809** or power failure will likely corrupt the database file.
drhdd809b02004-07-17 21:44:57 +0000810*/
drheb796a72005-09-08 12:38:41 +0000811static int full_fsync(int fd, int fullSync, int dataOnly){
drhdd809b02004-07-17 21:44:57 +0000812 int rc;
drhb851b2c2005-03-10 14:11:12 +0000813
814 /* Record the number of times that we do a normal fsync() and
815 ** FULLSYNC. This is used during testing to verify that this procedure
816 ** gets called with the correct arguments.
817 */
818#ifdef SQLITE_TEST
819 if( fullSync ) sqlite3_fullsync_count++;
820 sqlite3_sync_count++;
821#endif
822
823 /* If we compiled with the SQLITE_NO_SYNC flag, then syncing is a
824 ** no-op
825 */
826#ifdef SQLITE_NO_SYNC
827 rc = SQLITE_OK;
828#else
829
drhdd809b02004-07-17 21:44:57 +0000830#ifdef F_FULLFSYNC
drhb851b2c2005-03-10 14:11:12 +0000831 if( fullSync ){
drhf30cc942005-03-11 17:52:34 +0000832 rc = fcntl(fd, F_FULLFSYNC, 0);
drhb851b2c2005-03-10 14:11:12 +0000833 }else{
834 rc = 1;
835 }
836 /* If the FULLSYNC failed, try to do a normal fsync() */
drhdd809b02004-07-17 21:44:57 +0000837 if( rc ) rc = fsync(fd);
drhb851b2c2005-03-10 14:11:12 +0000838
drhc035e6e2005-09-22 15:45:04 +0000839#else /* if !defined(F_FULLSYNC) */
drheb796a72005-09-08 12:38:41 +0000840 if( dataOnly ){
841 rc = fdatasync(fd);
drhf2f23912005-10-05 10:29:36 +0000842 }else{
drheb796a72005-09-08 12:38:41 +0000843 rc = fsync(fd);
844 }
drhf30cc942005-03-11 17:52:34 +0000845#endif /* defined(F_FULLFSYNC) */
drhb851b2c2005-03-10 14:11:12 +0000846#endif /* defined(SQLITE_NO_SYNC) */
847
drhdd809b02004-07-17 21:44:57 +0000848 return rc;
849}
850
851/*
drhbbd42a62004-05-22 17:41:58 +0000852** Make sure all writes to a particular file are committed to disk.
853**
drheb796a72005-09-08 12:38:41 +0000854** If dataOnly==0 then both the file itself and its metadata (file
855** size, access time, etc) are synced. If dataOnly!=0 then only the
856** file data is synced.
857**
drhbbd42a62004-05-22 17:41:58 +0000858** Under Unix, also make sure that the directory entry for the file
859** has been created by fsync-ing the directory that contains the file.
860** If we do not do this and we encounter a power failure, the directory
861** entry for the journal might not exist after we reboot. The next
862** SQLite to access the file will not know that the journal exists (because
863** the directory entry for the journal was never created) and the transaction
864** will not roll back - possibly leading to database corruption.
865*/
drheb796a72005-09-08 12:38:41 +0000866int sqlite3OsSync(OsFile *id, int dataOnly){
drhda71ce12004-06-21 18:14:45 +0000867 assert( id->isOpen );
drhbbd42a62004-05-22 17:41:58 +0000868 SimulateIOError(SQLITE_IOERR);
drha6abd042004-06-09 17:37:22 +0000869 TRACE2("SYNC %-3d\n", id->h);
drheb796a72005-09-08 12:38:41 +0000870 if( full_fsync(id->h, id->fullSync, dataOnly) ){
drhbbd42a62004-05-22 17:41:58 +0000871 return SQLITE_IOERR;
drhbbd42a62004-05-22 17:41:58 +0000872 }
drha2854222004-06-17 19:04:17 +0000873 if( id->dirfd>=0 ){
874 TRACE2("DIRSYNC %-3d\n", id->dirfd);
drheb796a72005-09-08 12:38:41 +0000875 full_fsync(id->dirfd, id->fullSync, 0);
drha2854222004-06-17 19:04:17 +0000876 close(id->dirfd); /* Only need to sync once, so close the directory */
877 id->dirfd = -1; /* when we are done. */
878 }
drha2854222004-06-17 19:04:17 +0000879 return SQLITE_OK;
drhbbd42a62004-05-22 17:41:58 +0000880}
881
882/*
danielk1977962398d2004-06-14 09:35:16 +0000883** Sync the directory zDirname. This is a no-op on operating systems other
884** than UNIX.
drhb851b2c2005-03-10 14:11:12 +0000885**
886** This is used to make sure the master journal file has truely been deleted
887** before making changes to individual journals on a multi-database commit.
drhf30cc942005-03-11 17:52:34 +0000888** The F_FULLFSYNC option is not needed here.
danielk1977962398d2004-06-14 09:35:16 +0000889*/
890int sqlite3OsSyncDirectory(const char *zDirname){
891 int fd;
892 int r;
danielk1977369f27e2004-06-15 11:40:04 +0000893 SimulateIOError(SQLITE_IOERR);
drh8e855772005-05-17 11:25:31 +0000894 fd = open(zDirname, O_RDONLY|O_BINARY, 0);
danielk1977369f27e2004-06-15 11:40:04 +0000895 TRACE3("DIRSYNC %-3d (%s)\n", fd, zDirname);
danielk1977962398d2004-06-14 09:35:16 +0000896 if( fd<0 ){
897 return SQLITE_CANTOPEN;
898 }
899 r = fsync(fd);
900 close(fd);
901 return ((r==0)?SQLITE_OK:SQLITE_IOERR);
902}
903
904/*
drhbbd42a62004-05-22 17:41:58 +0000905** Truncate an open file to a specified size
906*/
drheb206252004-10-01 02:00:31 +0000907int sqlite3OsTruncate(OsFile *id, i64 nByte){
drhda71ce12004-06-21 18:14:45 +0000908 assert( id->isOpen );
drhbbd42a62004-05-22 17:41:58 +0000909 SimulateIOError(SQLITE_IOERR);
drha6abd042004-06-09 17:37:22 +0000910 return ftruncate(id->h, nByte)==0 ? SQLITE_OK : SQLITE_IOERR;
drhbbd42a62004-05-22 17:41:58 +0000911}
912
913/*
914** Determine the current size of a file in bytes
915*/
drheb206252004-10-01 02:00:31 +0000916int sqlite3OsFileSize(OsFile *id, i64 *pSize){
drhbbd42a62004-05-22 17:41:58 +0000917 struct stat buf;
drhda71ce12004-06-21 18:14:45 +0000918 assert( id->isOpen );
drhbbd42a62004-05-22 17:41:58 +0000919 SimulateIOError(SQLITE_IOERR);
drha6abd042004-06-09 17:37:22 +0000920 if( fstat(id->h, &buf)!=0 ){
drhbbd42a62004-05-22 17:41:58 +0000921 return SQLITE_IOERR;
922 }
923 *pSize = buf.st_size;
924 return SQLITE_OK;
925}
926
danielk19779a1d0ab2004-06-01 14:09:28 +0000927/*
danielk197713adf8a2004-06-03 16:08:41 +0000928** This routine checks if there is a RESERVED lock held on the specified
929** file by this or any other process. If such a lock is held, return
drh2ac3ee92004-06-07 16:27:46 +0000930** non-zero. If the file is unlocked or holds only SHARED locks, then
931** return zero.
danielk197713adf8a2004-06-03 16:08:41 +0000932*/
drha6abd042004-06-09 17:37:22 +0000933int sqlite3OsCheckReservedLock(OsFile *id){
danielk197713adf8a2004-06-03 16:08:41 +0000934 int r = 0;
935
drhda71ce12004-06-21 18:14:45 +0000936 assert( id->isOpen );
drh2b4b5962005-06-15 17:47:55 +0000937 if( CHECK_THREADID(id) ) return SQLITE_MISUSE;
drh2ac3ee92004-06-07 16:27:46 +0000938 sqlite3OsEnterMutex(); /* Needed because id->pLock is shared across threads */
danielk197713adf8a2004-06-03 16:08:41 +0000939
940 /* Check if a thread in this process holds such a lock */
941 if( id->pLock->locktype>SHARED_LOCK ){
942 r = 1;
943 }
944
drh2ac3ee92004-06-07 16:27:46 +0000945 /* Otherwise see if some other process holds it.
danielk197713adf8a2004-06-03 16:08:41 +0000946 */
947 if( !r ){
948 struct flock lock;
949 lock.l_whence = SEEK_SET;
drh2ac3ee92004-06-07 16:27:46 +0000950 lock.l_start = RESERVED_BYTE;
951 lock.l_len = 1;
952 lock.l_type = F_WRLCK;
drha6abd042004-06-09 17:37:22 +0000953 fcntl(id->h, F_GETLK, &lock);
danielk197713adf8a2004-06-03 16:08:41 +0000954 if( lock.l_type!=F_UNLCK ){
955 r = 1;
956 }
957 }
958
959 sqlite3OsLeaveMutex();
drha6abd042004-06-09 17:37:22 +0000960 TRACE3("TEST WR-LOCK %d %d\n", id->h, r);
danielk197713adf8a2004-06-03 16:08:41 +0000961
962 return r;
963}
964
danielk19772b444852004-06-29 07:45:33 +0000965#ifdef SQLITE_DEBUG
966/*
967** Helper function for printing out trace information from debugging
968** binaries. This returns the string represetation of the supplied
969** integer lock-type.
970*/
971static const char * locktypeName(int locktype){
972 switch( locktype ){
973 case NO_LOCK: return "NONE";
974 case SHARED_LOCK: return "SHARED";
975 case RESERVED_LOCK: return "RESERVED";
976 case PENDING_LOCK: return "PENDING";
977 case EXCLUSIVE_LOCK: return "EXCLUSIVE";
978 }
979 return "ERROR";
980}
981#endif
982
danielk197713adf8a2004-06-03 16:08:41 +0000983/*
danielk19779a1d0ab2004-06-01 14:09:28 +0000984** Lock the file with the lock specified by parameter locktype - one
985** of the following:
986**
drh2ac3ee92004-06-07 16:27:46 +0000987** (1) SHARED_LOCK
988** (2) RESERVED_LOCK
989** (3) PENDING_LOCK
990** (4) EXCLUSIVE_LOCK
991**
drhb3e04342004-06-08 00:47:47 +0000992** Sometimes when requesting one lock state, additional lock states
993** are inserted in between. The locking might fail on one of the later
994** transitions leaving the lock state different from what it started but
995** still short of its goal. The following chart shows the allowed
996** transitions and the inserted intermediate states:
997**
998** UNLOCKED -> SHARED
999** SHARED -> RESERVED
1000** SHARED -> (PENDING) -> EXCLUSIVE
1001** RESERVED -> (PENDING) -> EXCLUSIVE
1002** PENDING -> EXCLUSIVE
drh2ac3ee92004-06-07 16:27:46 +00001003**
drha6abd042004-06-09 17:37:22 +00001004** This routine will only increase a lock. Use the sqlite3OsUnlock()
1005** routine to lower a locking level.
danielk19779a1d0ab2004-06-01 14:09:28 +00001006*/
1007int sqlite3OsLock(OsFile *id, int locktype){
danielk1977f42f25c2004-06-25 07:21:28 +00001008 /* The following describes the implementation of the various locks and
1009 ** lock transitions in terms of the POSIX advisory shared and exclusive
1010 ** lock primitives (called read-locks and write-locks below, to avoid
1011 ** confusion with SQLite lock names). The algorithms are complicated
1012 ** slightly in order to be compatible with windows systems simultaneously
1013 ** accessing the same database file, in case that is ever required.
1014 **
1015 ** Symbols defined in os.h indentify the 'pending byte' and the 'reserved
1016 ** byte', each single bytes at well known offsets, and the 'shared byte
1017 ** range', a range of 510 bytes at a well known offset.
1018 **
1019 ** To obtain a SHARED lock, a read-lock is obtained on the 'pending
1020 ** byte'. If this is successful, a random byte from the 'shared byte
1021 ** range' is read-locked and the lock on the 'pending byte' released.
1022 **
danielk197790ba3bd2004-06-25 08:32:25 +00001023 ** A process may only obtain a RESERVED lock after it has a SHARED lock.
1024 ** A RESERVED lock is implemented by grabbing a write-lock on the
1025 ** 'reserved byte'.
danielk1977f42f25c2004-06-25 07:21:28 +00001026 **
1027 ** A process may only obtain a PENDING lock after it has obtained a
danielk197790ba3bd2004-06-25 08:32:25 +00001028 ** SHARED lock. A PENDING lock is implemented by obtaining a write-lock
1029 ** on the 'pending byte'. This ensures that no new SHARED locks can be
1030 ** obtained, but existing SHARED locks are allowed to persist. A process
1031 ** does not have to obtain a RESERVED lock on the way to a PENDING lock.
1032 ** This property is used by the algorithm for rolling back a journal file
1033 ** after a crash.
danielk1977f42f25c2004-06-25 07:21:28 +00001034 **
danielk197790ba3bd2004-06-25 08:32:25 +00001035 ** An EXCLUSIVE lock, obtained after a PENDING lock is held, is
1036 ** implemented by obtaining a write-lock on the entire 'shared byte
1037 ** range'. Since all other locks require a read-lock on one of the bytes
1038 ** within this range, this ensures that no other locks are held on the
1039 ** database.
danielk1977f42f25c2004-06-25 07:21:28 +00001040 **
1041 ** The reason a single byte cannot be used instead of the 'shared byte
1042 ** range' is that some versions of windows do not support read-locks. By
1043 ** locking a random byte from a range, concurrent SHARED locks may exist
1044 ** even if the locking primitive used is always a write-lock.
1045 */
danielk19779a1d0ab2004-06-01 14:09:28 +00001046 int rc = SQLITE_OK;
1047 struct lockInfo *pLock = id->pLock;
1048 struct flock lock;
1049 int s;
1050
drhda71ce12004-06-21 18:14:45 +00001051 assert( id->isOpen );
drhe29b9152005-03-18 14:03:15 +00001052 TRACE7("LOCK %d %s was %s(%s,%d) pid=%d\n", id->h, locktypeName(locktype),
danielk19772b444852004-06-29 07:45:33 +00001053 locktypeName(id->locktype), locktypeName(pLock->locktype), pLock->cnt
1054 ,getpid() );
drh2b4b5962005-06-15 17:47:55 +00001055 if( CHECK_THREADID(id) ) return SQLITE_MISUSE;
danielk19779a1d0ab2004-06-01 14:09:28 +00001056
1057 /* If there is already a lock of this type or more restrictive on the
1058 ** OsFile, do nothing. Don't use the end_lock: exit path, as
1059 ** sqlite3OsEnterMutex() hasn't been called yet.
1060 */
danielk197713adf8a2004-06-03 16:08:41 +00001061 if( id->locktype>=locktype ){
drhe29b9152005-03-18 14:03:15 +00001062 TRACE3("LOCK %d %s ok (already held)\n", id->h, locktypeName(locktype));
danielk19779a1d0ab2004-06-01 14:09:28 +00001063 return SQLITE_OK;
1064 }
1065
drhb3e04342004-06-08 00:47:47 +00001066 /* Make sure the locking sequence is correct
drh2ac3ee92004-06-07 16:27:46 +00001067 */
drhb3e04342004-06-08 00:47:47 +00001068 assert( id->locktype!=NO_LOCK || locktype==SHARED_LOCK );
1069 assert( locktype!=PENDING_LOCK );
1070 assert( locktype!=RESERVED_LOCK || id->locktype==SHARED_LOCK );
drh2ac3ee92004-06-07 16:27:46 +00001071
drhb3e04342004-06-08 00:47:47 +00001072 /* This mutex is needed because id->pLock is shared across threads
1073 */
1074 sqlite3OsEnterMutex();
danielk19779a1d0ab2004-06-01 14:09:28 +00001075
1076 /* If some thread using this PID has a lock via a different OsFile*
1077 ** handle that precludes the requested lock, return BUSY.
1078 */
danielk197713adf8a2004-06-03 16:08:41 +00001079 if( (id->locktype!=pLock->locktype &&
drh2ac3ee92004-06-07 16:27:46 +00001080 (pLock->locktype>=PENDING_LOCK || locktype>SHARED_LOCK))
danielk19779a1d0ab2004-06-01 14:09:28 +00001081 ){
1082 rc = SQLITE_BUSY;
1083 goto end_lock;
1084 }
1085
1086 /* If a SHARED lock is requested, and some thread using this PID already
1087 ** has a SHARED or RESERVED lock, then increment reference counts and
1088 ** return SQLITE_OK.
1089 */
1090 if( locktype==SHARED_LOCK &&
1091 (pLock->locktype==SHARED_LOCK || pLock->locktype==RESERVED_LOCK) ){
1092 assert( locktype==SHARED_LOCK );
danielk197713adf8a2004-06-03 16:08:41 +00001093 assert( id->locktype==0 );
danielk1977ecb2a962004-06-02 06:30:16 +00001094 assert( pLock->cnt>0 );
danielk197713adf8a2004-06-03 16:08:41 +00001095 id->locktype = SHARED_LOCK;
danielk19779a1d0ab2004-06-01 14:09:28 +00001096 pLock->cnt++;
1097 id->pOpen->nLock++;
1098 goto end_lock;
1099 }
1100
danielk197713adf8a2004-06-03 16:08:41 +00001101 lock.l_len = 1L;
drh2b4b5962005-06-15 17:47:55 +00001102
danielk19779a1d0ab2004-06-01 14:09:28 +00001103 lock.l_whence = SEEK_SET;
1104
drh3cde3bb2004-06-12 02:17:14 +00001105 /* A PENDING lock is needed before acquiring a SHARED lock and before
1106 ** acquiring an EXCLUSIVE lock. For the SHARED lock, the PENDING will
1107 ** be released.
danielk19779a1d0ab2004-06-01 14:09:28 +00001108 */
drh3cde3bb2004-06-12 02:17:14 +00001109 if( locktype==SHARED_LOCK
1110 || (locktype==EXCLUSIVE_LOCK && id->locktype<PENDING_LOCK)
1111 ){
danielk1977489468c2004-06-28 08:25:47 +00001112 lock.l_type = (locktype==SHARED_LOCK?F_RDLCK:F_WRLCK);
drh2ac3ee92004-06-07 16:27:46 +00001113 lock.l_start = PENDING_BYTE;
drha6abd042004-06-09 17:37:22 +00001114 s = fcntl(id->h, F_SETLK, &lock);
danielk19779a1d0ab2004-06-01 14:09:28 +00001115 if( s ){
1116 rc = (errno==EINVAL) ? SQLITE_NOLFS : SQLITE_BUSY;
1117 goto end_lock;
1118 }
drh3cde3bb2004-06-12 02:17:14 +00001119 }
1120
1121
1122 /* If control gets to this point, then actually go ahead and make
1123 ** operating system calls for the specified lock.
1124 */
1125 if( locktype==SHARED_LOCK ){
1126 assert( pLock->cnt==0 );
1127 assert( pLock->locktype==0 );
danielk19779a1d0ab2004-06-01 14:09:28 +00001128
drh2ac3ee92004-06-07 16:27:46 +00001129 /* Now get the read-lock */
1130 lock.l_start = SHARED_FIRST;
1131 lock.l_len = SHARED_SIZE;
drha6abd042004-06-09 17:37:22 +00001132 s = fcntl(id->h, F_SETLK, &lock);
drh2ac3ee92004-06-07 16:27:46 +00001133
1134 /* Drop the temporary PENDING lock */
1135 lock.l_start = PENDING_BYTE;
1136 lock.l_len = 1L;
danielk19779a1d0ab2004-06-01 14:09:28 +00001137 lock.l_type = F_UNLCK;
drh2b4b5962005-06-15 17:47:55 +00001138 if( fcntl(id->h, F_SETLK, &lock)!=0 ){
1139 rc = SQLITE_IOERR; /* This should never happen */
1140 goto end_lock;
1141 }
danielk19779a1d0ab2004-06-01 14:09:28 +00001142 if( s ){
drhbbd42a62004-05-22 17:41:58 +00001143 rc = (errno==EINVAL) ? SQLITE_NOLFS : SQLITE_BUSY;
1144 }else{
danielk197713adf8a2004-06-03 16:08:41 +00001145 id->locktype = SHARED_LOCK;
danielk1977ecb2a962004-06-02 06:30:16 +00001146 id->pOpen->nLock++;
danielk19779a1d0ab2004-06-01 14:09:28 +00001147 pLock->cnt = 1;
drhbbd42a62004-05-22 17:41:58 +00001148 }
drh3cde3bb2004-06-12 02:17:14 +00001149 }else if( locktype==EXCLUSIVE_LOCK && pLock->cnt>1 ){
1150 /* We are trying for an exclusive lock but another thread in this
1151 ** same process is still holding a shared lock. */
1152 rc = SQLITE_BUSY;
drhbbd42a62004-05-22 17:41:58 +00001153 }else{
drh3cde3bb2004-06-12 02:17:14 +00001154 /* The request was for a RESERVED or EXCLUSIVE lock. It is
danielk19779a1d0ab2004-06-01 14:09:28 +00001155 ** assumed that there is a SHARED or greater lock on the file
1156 ** already.
1157 */
danielk197713adf8a2004-06-03 16:08:41 +00001158 assert( 0!=id->locktype );
danielk19779a1d0ab2004-06-01 14:09:28 +00001159 lock.l_type = F_WRLCK;
1160 switch( locktype ){
1161 case RESERVED_LOCK:
drh2ac3ee92004-06-07 16:27:46 +00001162 lock.l_start = RESERVED_BYTE;
danielk19779a1d0ab2004-06-01 14:09:28 +00001163 break;
danielk19779a1d0ab2004-06-01 14:09:28 +00001164 case EXCLUSIVE_LOCK:
drh2ac3ee92004-06-07 16:27:46 +00001165 lock.l_start = SHARED_FIRST;
1166 lock.l_len = SHARED_SIZE;
danielk19779a1d0ab2004-06-01 14:09:28 +00001167 break;
1168 default:
1169 assert(0);
1170 }
drha6abd042004-06-09 17:37:22 +00001171 s = fcntl(id->h, F_SETLK, &lock);
danielk19779a1d0ab2004-06-01 14:09:28 +00001172 if( s ){
1173 rc = (errno==EINVAL) ? SQLITE_NOLFS : SQLITE_BUSY;
1174 }
drhbbd42a62004-05-22 17:41:58 +00001175 }
danielk19779a1d0ab2004-06-01 14:09:28 +00001176
danielk1977ecb2a962004-06-02 06:30:16 +00001177 if( rc==SQLITE_OK ){
danielk197713adf8a2004-06-03 16:08:41 +00001178 id->locktype = locktype;
danielk1977ecb2a962004-06-02 06:30:16 +00001179 pLock->locktype = locktype;
drh3cde3bb2004-06-12 02:17:14 +00001180 }else if( locktype==EXCLUSIVE_LOCK ){
1181 id->locktype = PENDING_LOCK;
1182 pLock->locktype = PENDING_LOCK;
danielk1977ecb2a962004-06-02 06:30:16 +00001183 }
danielk19779a1d0ab2004-06-01 14:09:28 +00001184
1185end_lock:
drhbbd42a62004-05-22 17:41:58 +00001186 sqlite3OsLeaveMutex();
drhe29b9152005-03-18 14:03:15 +00001187 TRACE4("LOCK %d %s %s\n", id->h, locktypeName(locktype),
danielk19772b444852004-06-29 07:45:33 +00001188 rc==SQLITE_OK ? "ok" : "failed");
drhbbd42a62004-05-22 17:41:58 +00001189 return rc;
1190}
1191
1192/*
drha6abd042004-06-09 17:37:22 +00001193** Lower the locking level on file descriptor id to locktype. locktype
1194** must be either NO_LOCK or SHARED_LOCK.
1195**
1196** If the locking level of the file descriptor is already at or below
1197** the requested locking level, this routine is a no-op.
1198**
drh9c105bb2004-10-02 20:38:28 +00001199** It is not possible for this routine to fail if the second argument
1200** is NO_LOCK. If the second argument is SHARED_LOCK, this routine
1201** might return SQLITE_IOERR instead of SQLITE_OK.
drhbbd42a62004-05-22 17:41:58 +00001202*/
drha6abd042004-06-09 17:37:22 +00001203int sqlite3OsUnlock(OsFile *id, int locktype){
1204 struct lockInfo *pLock;
1205 struct flock lock;
drh9c105bb2004-10-02 20:38:28 +00001206 int rc = SQLITE_OK;
drha6abd042004-06-09 17:37:22 +00001207
drhda71ce12004-06-21 18:14:45 +00001208 assert( id->isOpen );
drhe29b9152005-03-18 14:03:15 +00001209 TRACE7("UNLOCK %d %d was %d(%d,%d) pid=%d\n", id->h, locktype, id->locktype,
danielk19772b444852004-06-29 07:45:33 +00001210 id->pLock->locktype, id->pLock->cnt, getpid());
drh2b4b5962005-06-15 17:47:55 +00001211 if( CHECK_THREADID(id) ) return SQLITE_MISUSE;
drha6abd042004-06-09 17:37:22 +00001212
1213 assert( locktype<=SHARED_LOCK );
1214 if( id->locktype<=locktype ){
1215 return SQLITE_OK;
1216 }
drhbbd42a62004-05-22 17:41:58 +00001217 sqlite3OsEnterMutex();
drha6abd042004-06-09 17:37:22 +00001218 pLock = id->pLock;
1219 assert( pLock->cnt!=0 );
1220 if( id->locktype>SHARED_LOCK ){
1221 assert( pLock->locktype==id->locktype );
drh9c105bb2004-10-02 20:38:28 +00001222 if( locktype==SHARED_LOCK ){
1223 lock.l_type = F_RDLCK;
1224 lock.l_whence = SEEK_SET;
1225 lock.l_start = SHARED_FIRST;
1226 lock.l_len = SHARED_SIZE;
1227 if( fcntl(id->h, F_SETLK, &lock)!=0 ){
1228 /* This should never happen */
1229 rc = SQLITE_IOERR;
1230 }
1231 }
drhbbd42a62004-05-22 17:41:58 +00001232 lock.l_type = F_UNLCK;
1233 lock.l_whence = SEEK_SET;
drha6abd042004-06-09 17:37:22 +00001234 lock.l_start = PENDING_BYTE;
1235 lock.l_len = 2L; assert( PENDING_BYTE+1==RESERVED_BYTE );
drh2b4b5962005-06-15 17:47:55 +00001236 if( fcntl(id->h, F_SETLK, &lock)==0 ){
1237 pLock->locktype = SHARED_LOCK;
1238 }else{
1239 rc = SQLITE_IOERR; /* This should never happen */
1240 }
drhbbd42a62004-05-22 17:41:58 +00001241 }
drha6abd042004-06-09 17:37:22 +00001242 if( locktype==NO_LOCK ){
1243 struct openCnt *pOpen;
danielk1977ecb2a962004-06-02 06:30:16 +00001244
drha6abd042004-06-09 17:37:22 +00001245 /* Decrement the shared lock counter. Release the lock using an
1246 ** OS call only when all threads in this same process have released
1247 ** the lock.
1248 */
1249 pLock->cnt--;
1250 if( pLock->cnt==0 ){
1251 lock.l_type = F_UNLCK;
1252 lock.l_whence = SEEK_SET;
1253 lock.l_start = lock.l_len = 0L;
drh2b4b5962005-06-15 17:47:55 +00001254 if( fcntl(id->h, F_SETLK, &lock)==0 ){
1255 pLock->locktype = NO_LOCK;
1256 }else{
1257 rc = SQLITE_IOERR; /* This should never happen */
1258 }
drha6abd042004-06-09 17:37:22 +00001259 }
1260
drhbbd42a62004-05-22 17:41:58 +00001261 /* Decrement the count of locks against this same file. When the
1262 ** count reaches zero, close any other file descriptors whose close
1263 ** was deferred because of outstanding locks.
1264 */
drha6abd042004-06-09 17:37:22 +00001265 pOpen = id->pOpen;
drhbbd42a62004-05-22 17:41:58 +00001266 pOpen->nLock--;
1267 assert( pOpen->nLock>=0 );
1268 if( pOpen->nLock==0 && pOpen->nPending>0 ){
1269 int i;
1270 for(i=0; i<pOpen->nPending; i++){
1271 close(pOpen->aPending[i]);
1272 }
1273 sqliteFree(pOpen->aPending);
1274 pOpen->nPending = 0;
1275 pOpen->aPending = 0;
1276 }
1277 }
1278 sqlite3OsLeaveMutex();
drha6abd042004-06-09 17:37:22 +00001279 id->locktype = locktype;
drh9c105bb2004-10-02 20:38:28 +00001280 return rc;
drhbbd42a62004-05-22 17:41:58 +00001281}
1282
1283/*
danielk1977e3026632004-06-22 11:29:02 +00001284** Close a file.
1285*/
1286int sqlite3OsClose(OsFile *id){
1287 if( !id->isOpen ) return SQLITE_OK;
drh2b4b5962005-06-15 17:47:55 +00001288 if( CHECK_THREADID(id) ) return SQLITE_MISUSE;
danielk1977e3026632004-06-22 11:29:02 +00001289 sqlite3OsUnlock(id, NO_LOCK);
1290 if( id->dirfd>=0 ) close(id->dirfd);
1291 id->dirfd = -1;
1292 sqlite3OsEnterMutex();
1293 if( id->pOpen->nLock ){
1294 /* If there are outstanding locks, do not actually close the file just
1295 ** yet because that would clear those locks. Instead, add the file
1296 ** descriptor to pOpen->aPending. It will be automatically closed when
1297 ** the last lock is cleared.
1298 */
1299 int *aNew;
1300 struct openCnt *pOpen = id->pOpen;
drhad81e872005-08-21 21:45:01 +00001301 aNew = sqliteRealloc( pOpen->aPending, (pOpen->nPending+1)*sizeof(int) );
danielk1977e3026632004-06-22 11:29:02 +00001302 if( aNew==0 ){
1303 /* If a malloc fails, just leak the file descriptor */
1304 }else{
1305 pOpen->aPending = aNew;
drhad81e872005-08-21 21:45:01 +00001306 pOpen->aPending[pOpen->nPending] = id->h;
1307 pOpen->nPending++;
danielk1977e3026632004-06-22 11:29:02 +00001308 }
1309 }else{
1310 /* There are no outstanding locks so we can close the file immediately */
1311 close(id->h);
1312 }
1313 releaseLockInfo(id->pLock);
1314 releaseOpenCnt(id->pOpen);
1315 sqlite3OsLeaveMutex();
1316 id->isOpen = 0;
1317 TRACE2("CLOSE %-3d\n", id->h);
1318 OpenCounter(-1);
1319 return SQLITE_OK;
1320}
1321
1322/*
drh0ccebe72005-06-07 22:22:50 +00001323** Turn a relative pathname into a full pathname. Return a pointer
1324** to the full pathname stored in space obtained from sqliteMalloc().
1325** The calling function is responsible for freeing this space once it
1326** is no longer needed.
1327*/
1328char *sqlite3OsFullPathname(const char *zRelative){
1329 char *zFull = 0;
1330 if( zRelative[0]=='/' ){
1331 sqlite3SetString(&zFull, zRelative, (char*)0);
1332 }else{
drh79158e12005-09-06 21:40:45 +00001333 char *zBuf = sqliteMalloc(5000);
1334 if( zBuf==0 ){
1335 return 0;
1336 }
drh0ccebe72005-06-07 22:22:50 +00001337 zBuf[0] = 0;
drh79158e12005-09-06 21:40:45 +00001338 sqlite3SetString(&zFull, getcwd(zBuf, 5000), "/", zRelative,
drh0ccebe72005-06-07 22:22:50 +00001339 (char*)0);
drh79158e12005-09-06 21:40:45 +00001340 sqliteFree(zBuf);
drh0ccebe72005-06-07 22:22:50 +00001341 }
1342 return zFull;
1343}
1344
1345
1346#endif /* SQLITE_OMIT_DISKIO */
1347/***************************************************************************
1348** Everything above deals with file I/O. Everything that follows deals
1349** with other miscellanous aspects of the operating system interface
1350****************************************************************************/
1351
1352
1353/*
drhbbd42a62004-05-22 17:41:58 +00001354** Get information to seed the random number generator. The seed
1355** is written into the buffer zBuf[256]. The calling function must
1356** supply a sufficiently large buffer.
1357*/
1358int sqlite3OsRandomSeed(char *zBuf){
1359 /* We have to initialize zBuf to prevent valgrind from reporting
1360 ** errors. The reports issued by valgrind are incorrect - we would
1361 ** prefer that the randomness be increased by making use of the
1362 ** uninitialized space in zBuf - but valgrind errors tend to worry
1363 ** some users. Rather than argue, it seems easier just to initialize
1364 ** the whole array and silence valgrind, even if that means less randomness
1365 ** in the random seed.
1366 **
1367 ** When testing, initializing zBuf[] to zero is all we do. That means
1368 ** that we always use the same random number sequence.* This makes the
1369 ** tests repeatable.
1370 */
1371 memset(zBuf, 0, 256);
1372#if !defined(SQLITE_TEST)
1373 {
drh842b8642005-01-21 17:53:17 +00001374 int pid, fd;
1375 fd = open("/dev/urandom", O_RDONLY);
1376 if( fd<0 ){
1377 time((time_t*)zBuf);
1378 pid = getpid();
1379 memcpy(&zBuf[sizeof(time_t)], &pid, sizeof(pid));
1380 }else{
1381 read(fd, zBuf, 256);
1382 close(fd);
1383 }
drhbbd42a62004-05-22 17:41:58 +00001384 }
1385#endif
1386 return SQLITE_OK;
1387}
1388
1389/*
1390** Sleep for a little while. Return the amount of time slept.
1391*/
1392int sqlite3OsSleep(int ms){
1393#if defined(HAVE_USLEEP) && HAVE_USLEEP
1394 usleep(ms*1000);
1395 return ms;
1396#else
1397 sleep((ms+999)/1000);
1398 return 1000*((ms+999)/1000);
1399#endif
1400}
1401
1402/*
1403** Static variables used for thread synchronization
1404*/
1405static int inMutex = 0;
drh79069752004-05-22 21:30:40 +00001406#ifdef SQLITE_UNIX_THREADS
drhbbd42a62004-05-22 17:41:58 +00001407static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
drh79069752004-05-22 21:30:40 +00001408#endif
drhbbd42a62004-05-22 17:41:58 +00001409
1410/*
1411** The following pair of routine implement mutual exclusion for
1412** multi-threaded processes. Only a single thread is allowed to
1413** executed code that is surrounded by EnterMutex() and LeaveMutex().
1414**
1415** SQLite uses only a single Mutex. There is not much critical
1416** code and what little there is executes quickly and without blocking.
1417*/
1418void sqlite3OsEnterMutex(){
1419#ifdef SQLITE_UNIX_THREADS
1420 pthread_mutex_lock(&mutex);
1421#endif
1422 assert( !inMutex );
1423 inMutex = 1;
1424}
1425void sqlite3OsLeaveMutex(){
1426 assert( inMutex );
1427 inMutex = 0;
1428#ifdef SQLITE_UNIX_THREADS
1429 pthread_mutex_unlock(&mutex);
1430#endif
1431}
1432
1433/*
drhbbd42a62004-05-22 17:41:58 +00001434** The following variable, if set to a non-zero value, becomes the result
1435** returned from sqlite3OsCurrentTime(). This is used for testing.
1436*/
1437#ifdef SQLITE_TEST
1438int sqlite3_current_time = 0;
1439#endif
1440
1441/*
1442** Find the current time (in Universal Coordinated Time). Write the
1443** current time and date as a Julian Day number into *prNow and
1444** return 0. Return 1 if the time and date cannot be found.
1445*/
1446int sqlite3OsCurrentTime(double *prNow){
drh19e2d372005-08-29 23:00:03 +00001447#ifdef NO_GETTOD
drhbbd42a62004-05-22 17:41:58 +00001448 time_t t;
1449 time(&t);
1450 *prNow = t/86400.0 + 2440587.5;
drh19e2d372005-08-29 23:00:03 +00001451#else
1452 struct timeval sNow;
1453 struct timezone sTz; /* Not used */
1454 gettimeofday(&sNow, &sTz);
1455 *prNow = 2440587.5 + sNow.tv_sec/86400.0 + sNow.tv_usec/86400000000.0;
1456#endif
drhbbd42a62004-05-22 17:41:58 +00001457#ifdef SQLITE_TEST
1458 if( sqlite3_current_time ){
1459 *prNow = sqlite3_current_time/86400.0 + 2440587.5;
1460 }
1461#endif
1462 return 0;
1463}
1464
drhbbd42a62004-05-22 17:41:58 +00001465#endif /* OS_UNIX */