blob: 959e2a6816c993a67f697f585d2ea2f73ac0a073 [file] [log] [blame]
drhbbd42a62004-05-22 17:41:58 +00001/*
2** 2004 May 22
3**
4** The author disclaims copyright to this source code. In place of
5** a legal notice, here is a blessing:
6**
7** May you do good and not evil.
8** May you find forgiveness for yourself and forgive others.
9** May you share freely, never taking more than you give.
10**
11******************************************************************************
12**
13** This file contains code that is specific to Unix systems.
14*/
drhbbd42a62004-05-22 17:41:58 +000015#include "sqliteInt.h"
drheb206252004-10-01 02:00:31 +000016#include "os.h"
17#if OS_UNIX /* This file is used on unix only */
drhbbd42a62004-05-22 17:41:58 +000018
19
20#include <time.h>
drh19e2d372005-08-29 23:00:03 +000021#include <sys/time.h>
drhbbd42a62004-05-22 17:41:58 +000022#include <errno.h>
23#include <unistd.h>
drh0ccebe72005-06-07 22:22:50 +000024
25/*
26** Do not include any of the File I/O interface procedures if the
27** SQLITE_OMIT_DISKIO macro is defined (indicating that there database
28** will be in-memory only)
29*/
30#ifndef SQLITE_OMIT_DISKIO
31
32
33/*
34** Define various macros that are missing from some systems.
35*/
drhbbd42a62004-05-22 17:41:58 +000036#ifndef O_LARGEFILE
37# define O_LARGEFILE 0
38#endif
39#ifdef SQLITE_DISABLE_LFS
40# undef O_LARGEFILE
41# define O_LARGEFILE 0
42#endif
43#ifndef O_NOFOLLOW
44# define O_NOFOLLOW 0
45#endif
46#ifndef O_BINARY
47# define O_BINARY 0
48#endif
49
50/*
51** The DJGPP compiler environment looks mostly like Unix, but it
52** lacks the fcntl() system call. So redefine fcntl() to be something
53** that always succeeds. This means that locking does not occur under
54** DJGPP. But its DOS - what did you expect?
55*/
56#ifdef __DJGPP__
57# define fcntl(A,B,C) 0
58#endif
59
60/*
drhbbd42a62004-05-22 17:41:58 +000061** Include code that is common to all os_*.c files
62*/
63#include "os_common.h"
64
drh2b4b5962005-06-15 17:47:55 +000065/*
66** The threadid macro resolves to the thread-id or to 0. Used for
67** testing and debugging only.
68*/
69#ifdef SQLITE_UNIX_THREADS
70#define threadid pthread_self()
71#else
72#define threadid 0
73#endif
74
75/*
76** Set or check the OsFile.tid field. This field is set when an OsFile
77** is first opened. All subsequent uses of the OsFile verify that the
78** same thread is operating on the OsFile. Some operating systems do
79** not allow locks to be overridden by other threads and that restriction
80** means that sqlite3* database handles cannot be moved from one thread
81** to another. This logic makes sure a user does not try to do that
82** by mistake.
83*/
84#ifdef SQLITE_UNIX_THREADS
85# define SET_THREADID(X) X->tid = pthread_self()
86# define CHECK_THREADID(X) (!pthread_equal(X->tid, pthread_self()))
87#else
88# define SET_THREADID(X)
89# define CHECK_THREADID(X) 0
danielk197713adf8a2004-06-03 16:08:41 +000090#endif
91
drhbbd42a62004-05-22 17:41:58 +000092/*
93** Here is the dirt on POSIX advisory locks: ANSI STD 1003.1 (1996)
94** section 6.5.2.2 lines 483 through 490 specify that when a process
95** sets or clears a lock, that operation overrides any prior locks set
96** by the same process. It does not explicitly say so, but this implies
97** that it overrides locks set by the same process using a different
98** file descriptor. Consider this test case:
99**
100** int fd1 = open("./file1", O_RDWR|O_CREAT, 0644);
101** int fd2 = open("./file2", O_RDWR|O_CREAT, 0644);
102**
103** Suppose ./file1 and ./file2 are really the same file (because
104** one is a hard or symbolic link to the other) then if you set
105** an exclusive lock on fd1, then try to get an exclusive lock
106** on fd2, it works. I would have expected the second lock to
107** fail since there was already a lock on the file due to fd1.
108** But not so. Since both locks came from the same process, the
109** second overrides the first, even though they were on different
110** file descriptors opened on different file names.
111**
112** Bummer. If you ask me, this is broken. Badly broken. It means
113** that we cannot use POSIX locks to synchronize file access among
114** competing threads of the same process. POSIX locks will work fine
115** to synchronize access for threads in separate processes, but not
116** threads within the same process.
117**
118** To work around the problem, SQLite has to manage file locks internally
119** on its own. Whenever a new database is opened, we have to find the
120** specific inode of the database file (the inode is determined by the
121** st_dev and st_ino fields of the stat structure that fstat() fills in)
122** and check for locks already existing on that inode. When locks are
123** created or removed, we have to look at our own internal record of the
124** locks to see if another thread has previously set a lock on that same
125** inode.
126**
127** The OsFile structure for POSIX is no longer just an integer file
128** descriptor. It is now a structure that holds the integer file
129** descriptor and a pointer to a structure that describes the internal
130** locks on the corresponding inode. There is one locking structure
131** per inode, so if the same inode is opened twice, both OsFile structures
132** point to the same locking structure. The locking structure keeps
133** a reference count (so we will know when to delete it) and a "cnt"
134** field that tells us its internal lock status. cnt==0 means the
135** file is unlocked. cnt==-1 means the file has an exclusive lock.
136** cnt>0 means there are cnt shared locks on the file.
137**
138** Any attempt to lock or unlock a file first checks the locking
139** structure. The fcntl() system call is only invoked to set a
140** POSIX lock if the internal lock structure transitions between
141** a locked and an unlocked state.
142**
143** 2004-Jan-11:
144** More recent discoveries about POSIX advisory locks. (The more
145** I discover, the more I realize the a POSIX advisory locks are
146** an abomination.)
147**
148** If you close a file descriptor that points to a file that has locks,
149** all locks on that file that are owned by the current process are
150** released. To work around this problem, each OsFile structure contains
151** a pointer to an openCnt structure. There is one openCnt structure
152** per open inode, which means that multiple OsFiles can point to a single
153** openCnt. When an attempt is made to close an OsFile, if there are
154** other OsFiles open on the same inode that are holding locks, the call
155** to close() the file descriptor is deferred until all of the locks clear.
156** The openCnt structure keeps a list of file descriptors that need to
157** be closed and that list is walked (and cleared) when the last lock
158** clears.
159**
160** First, under Linux threads, because each thread has a separate
161** process ID, lock operations in one thread do not override locks
162** to the same file in other threads. Linux threads behave like
163** separate processes in this respect. But, if you close a file
164** descriptor in linux threads, all locks are cleared, even locks
165** on other threads and even though the other threads have different
166** process IDs. Linux threads is inconsistent in this respect.
167** (I'm beginning to think that linux threads is an abomination too.)
168** The consequence of this all is that the hash table for the lockInfo
169** structure has to include the process id as part of its key because
170** locks in different threads are treated as distinct. But the
171** openCnt structure should not include the process id in its
172** key because close() clears lock on all threads, not just the current
173** thread. Were it not for this goofiness in linux threads, we could
174** combine the lockInfo and openCnt structures into a single structure.
drh5fdae772004-06-29 03:29:00 +0000175**
176** 2004-Jun-28:
177** On some versions of linux, threads can override each others locks.
178** On others not. Sometimes you can change the behavior on the same
179** system by setting the LD_ASSUME_KERNEL environment variable. The
180** POSIX standard is silent as to which behavior is correct, as far
181** as I can tell, so other versions of unix might show the same
182** inconsistency. There is no little doubt in my mind that posix
183** advisory locks and linux threads are profoundly broken.
184**
185** To work around the inconsistencies, we have to test at runtime
186** whether or not threads can override each others locks. This test
187** is run once, the first time any lock is attempted. A static
188** variable is set to record the results of this test for future
189** use.
drhbbd42a62004-05-22 17:41:58 +0000190*/
191
192/*
193** An instance of the following structure serves as the key used
drh5fdae772004-06-29 03:29:00 +0000194** to locate a particular lockInfo structure given its inode.
195**
196** If threads cannot override each others locks, then we set the
197** lockKey.tid field to the thread ID. If threads can override
198** each others locks then tid is always set to zero. tid is also
199** set to zero if we compile without threading support.
drhbbd42a62004-05-22 17:41:58 +0000200*/
201struct lockKey {
drh5fdae772004-06-29 03:29:00 +0000202 dev_t dev; /* Device number */
203 ino_t ino; /* Inode number */
204#ifdef SQLITE_UNIX_THREADS
205 pthread_t tid; /* Thread ID or zero if threads cannot override each other */
206#endif
drhbbd42a62004-05-22 17:41:58 +0000207};
208
209/*
210** An instance of the following structure is allocated for each open
211** inode on each thread with a different process ID. (Threads have
212** different process IDs on linux, but not on most other unixes.)
213**
214** A single inode can have multiple file descriptors, so each OsFile
215** structure contains a pointer to an instance of this object and this
216** object keeps a count of the number of OsFiles pointing to it.
217*/
218struct lockInfo {
219 struct lockKey key; /* The lookup key */
drh2ac3ee92004-06-07 16:27:46 +0000220 int cnt; /* Number of SHARED locks held */
danielk19779a1d0ab2004-06-01 14:09:28 +0000221 int locktype; /* One of SHARED_LOCK, RESERVED_LOCK etc. */
drhbbd42a62004-05-22 17:41:58 +0000222 int nRef; /* Number of pointers to this structure */
223};
224
225/*
226** An instance of the following structure serves as the key used
227** to locate a particular openCnt structure given its inode. This
drh5fdae772004-06-29 03:29:00 +0000228** is the same as the lockKey except that the thread ID is omitted.
drhbbd42a62004-05-22 17:41:58 +0000229*/
230struct openKey {
231 dev_t dev; /* Device number */
232 ino_t ino; /* Inode number */
233};
234
235/*
236** An instance of the following structure is allocated for each open
237** inode. This structure keeps track of the number of locks on that
238** inode. If a close is attempted against an inode that is holding
239** locks, the close is deferred until all locks clear by adding the
240** file descriptor to be closed to the pending list.
241*/
242struct openCnt {
243 struct openKey key; /* The lookup key */
244 int nRef; /* Number of pointers to this structure */
245 int nLock; /* Number of outstanding locks */
246 int nPending; /* Number of pending close() operations */
247 int *aPending; /* Malloced space holding fd's awaiting a close() */
248};
249
250/*
251** These hash table maps inodes and process IDs into lockInfo and openCnt
252** structures. Access to these hash tables must be protected by a mutex.
253*/
254static Hash lockHash = { SQLITE_HASH_BINARY, 0, 0, 0, 0, 0 };
255static Hash openHash = { SQLITE_HASH_BINARY, 0, 0, 0, 0, 0 };
256
drh5fdae772004-06-29 03:29:00 +0000257
258#ifdef SQLITE_UNIX_THREADS
259/*
260** This variable records whether or not threads can override each others
261** locks.
262**
263** 0: No. Threads cannot override each others locks.
264** 1: Yes. Threads can override each others locks.
265** -1: We don't know yet.
266*/
267static int threadsOverrideEachOthersLocks = -1;
268
269/*
270** This structure holds information passed into individual test
271** threads by the testThreadLockingBehavior() routine.
272*/
273struct threadTestData {
274 int fd; /* File to be locked */
275 struct flock lock; /* The locking operation */
276 int result; /* Result of the locking operation */
277};
278
drh2b4b5962005-06-15 17:47:55 +0000279#ifdef SQLITE_LOCK_TRACE
280/*
281** Print out information about all locking operations.
282**
283** This routine is used for troubleshooting locks on multithreaded
284** platforms. Enable by compiling with the -DSQLITE_LOCK_TRACE
285** command-line option on the compiler. This code is normally
286** turnned off.
287*/
288static int lockTrace(int fd, int op, struct flock *p){
289 char *zOpName, *zType;
290 int s;
291 int savedErrno;
292 if( op==F_GETLK ){
293 zOpName = "GETLK";
294 }else if( op==F_SETLK ){
295 zOpName = "SETLK";
296 }else{
297 s = fcntl(fd, op, p);
298 sqlite3DebugPrintf("fcntl unknown %d %d %d\n", fd, op, s);
299 return s;
300 }
301 if( p->l_type==F_RDLCK ){
302 zType = "RDLCK";
303 }else if( p->l_type==F_WRLCK ){
304 zType = "WRLCK";
305 }else if( p->l_type==F_UNLCK ){
306 zType = "UNLCK";
307 }else{
308 assert( 0 );
309 }
310 assert( p->l_whence==SEEK_SET );
311 s = fcntl(fd, op, p);
312 savedErrno = errno;
313 sqlite3DebugPrintf("fcntl %d %d %s %s %d %d %d %d\n",
314 threadid, fd, zOpName, zType, (int)p->l_start, (int)p->l_len,
315 (int)p->l_pid, s);
316 if( s && op==F_SETLK && (p->l_type==F_RDLCK || p->l_type==F_WRLCK) ){
317 struct flock l2;
318 l2 = *p;
319 fcntl(fd, F_GETLK, &l2);
320 if( l2.l_type==F_RDLCK ){
321 zType = "RDLCK";
322 }else if( l2.l_type==F_WRLCK ){
323 zType = "WRLCK";
324 }else if( l2.l_type==F_UNLCK ){
325 zType = "UNLCK";
326 }else{
327 assert( 0 );
328 }
329 sqlite3DebugPrintf("fcntl-failure-reason: %s %d %d %d\n",
330 zType, (int)l2.l_start, (int)l2.l_len, (int)l2.l_pid);
331 }
332 errno = savedErrno;
333 return s;
334}
335#define fcntl lockTrace
336#endif /* SQLITE_LOCK_TRACE */
337
drh5fdae772004-06-29 03:29:00 +0000338/*
339** The testThreadLockingBehavior() routine launches two separate
340** threads on this routine. This routine attempts to lock a file
341** descriptor then returns. The success or failure of that attempt
342** allows the testThreadLockingBehavior() procedure to determine
343** whether or not threads can override each others locks.
344*/
345static void *threadLockingTest(void *pArg){
346 struct threadTestData *pData = (struct threadTestData*)pArg;
347 pData->result = fcntl(pData->fd, F_SETLK, &pData->lock);
348 return pArg;
349}
350
351/*
352** This procedure attempts to determine whether or not threads
353** can override each others locks then sets the
354** threadsOverrideEachOthersLocks variable appropriately.
355*/
356static void testThreadLockingBehavior(fd_orig){
357 int fd;
358 struct threadTestData d[2];
359 pthread_t t[2];
360
361 fd = dup(fd_orig);
362 if( fd<0 ) return;
363 memset(d, 0, sizeof(d));
364 d[0].fd = fd;
365 d[0].lock.l_type = F_RDLCK;
366 d[0].lock.l_len = 1;
367 d[0].lock.l_start = 0;
368 d[0].lock.l_whence = SEEK_SET;
369 d[1] = d[0];
370 d[1].lock.l_type = F_WRLCK;
371 pthread_create(&t[0], 0, threadLockingTest, &d[0]);
372 pthread_create(&t[1], 0, threadLockingTest, &d[1]);
373 pthread_join(t[0], 0);
374 pthread_join(t[1], 0);
375 close(fd);
376 threadsOverrideEachOthersLocks = d[0].result==0 && d[1].result==0;
377}
378#endif /* SQLITE_UNIX_THREADS */
379
drhbbd42a62004-05-22 17:41:58 +0000380/*
381** Release a lockInfo structure previously allocated by findLockInfo().
382*/
383static void releaseLockInfo(struct lockInfo *pLock){
384 pLock->nRef--;
385 if( pLock->nRef==0 ){
386 sqlite3HashInsert(&lockHash, &pLock->key, sizeof(pLock->key), 0);
387 sqliteFree(pLock);
388 }
389}
390
391/*
392** Release a openCnt structure previously allocated by findLockInfo().
393*/
394static void releaseOpenCnt(struct openCnt *pOpen){
395 pOpen->nRef--;
396 if( pOpen->nRef==0 ){
397 sqlite3HashInsert(&openHash, &pOpen->key, sizeof(pOpen->key), 0);
398 sqliteFree(pOpen->aPending);
399 sqliteFree(pOpen);
400 }
401}
402
403/*
404** Given a file descriptor, locate lockInfo and openCnt structures that
405** describes that file descriptor. Create a new ones if necessary. The
406** return values might be unset if an error occurs.
407**
408** Return the number of errors.
409*/
drh38f82712004-06-18 17:10:16 +0000410static int findLockInfo(
drhbbd42a62004-05-22 17:41:58 +0000411 int fd, /* The file descriptor used in the key */
412 struct lockInfo **ppLock, /* Return the lockInfo structure here */
drh5fdae772004-06-29 03:29:00 +0000413 struct openCnt **ppOpen /* Return the openCnt structure here */
drhbbd42a62004-05-22 17:41:58 +0000414){
415 int rc;
416 struct lockKey key1;
417 struct openKey key2;
418 struct stat statbuf;
419 struct lockInfo *pLock;
420 struct openCnt *pOpen;
421 rc = fstat(fd, &statbuf);
422 if( rc!=0 ) return 1;
423 memset(&key1, 0, sizeof(key1));
424 key1.dev = statbuf.st_dev;
425 key1.ino = statbuf.st_ino;
drh5fdae772004-06-29 03:29:00 +0000426#ifdef SQLITE_UNIX_THREADS
427 if( threadsOverrideEachOthersLocks<0 ){
428 testThreadLockingBehavior(fd);
429 }
430 key1.tid = threadsOverrideEachOthersLocks ? 0 : pthread_self();
431#endif
drhbbd42a62004-05-22 17:41:58 +0000432 memset(&key2, 0, sizeof(key2));
433 key2.dev = statbuf.st_dev;
434 key2.ino = statbuf.st_ino;
435 pLock = (struct lockInfo*)sqlite3HashFind(&lockHash, &key1, sizeof(key1));
436 if( pLock==0 ){
437 struct lockInfo *pOld;
438 pLock = sqliteMallocRaw( sizeof(*pLock) );
439 if( pLock==0 ) return 1;
440 pLock->key = key1;
441 pLock->nRef = 1;
442 pLock->cnt = 0;
danielk19779a1d0ab2004-06-01 14:09:28 +0000443 pLock->locktype = 0;
drhbbd42a62004-05-22 17:41:58 +0000444 pOld = sqlite3HashInsert(&lockHash, &pLock->key, sizeof(key1), pLock);
445 if( pOld!=0 ){
446 assert( pOld==pLock );
447 sqliteFree(pLock);
448 return 1;
449 }
450 }else{
451 pLock->nRef++;
452 }
453 *ppLock = pLock;
454 pOpen = (struct openCnt*)sqlite3HashFind(&openHash, &key2, sizeof(key2));
455 if( pOpen==0 ){
456 struct openCnt *pOld;
457 pOpen = sqliteMallocRaw( sizeof(*pOpen) );
458 if( pOpen==0 ){
459 releaseLockInfo(pLock);
460 return 1;
461 }
462 pOpen->key = key2;
463 pOpen->nRef = 1;
464 pOpen->nLock = 0;
465 pOpen->nPending = 0;
466 pOpen->aPending = 0;
467 pOld = sqlite3HashInsert(&openHash, &pOpen->key, sizeof(key2), pOpen);
468 if( pOld!=0 ){
469 assert( pOld==pOpen );
470 sqliteFree(pOpen);
471 releaseLockInfo(pLock);
472 return 1;
473 }
474 }else{
475 pOpen->nRef++;
476 }
477 *ppOpen = pOpen;
478 return 0;
479}
480
481/*
482** Delete the named file
483*/
484int sqlite3OsDelete(const char *zFilename){
485 unlink(zFilename);
486 return SQLITE_OK;
487}
488
489/*
490** Return TRUE if the named file exists.
491*/
492int sqlite3OsFileExists(const char *zFilename){
493 return access(zFilename, 0)==0;
494}
495
496/*
497** Attempt to open a file for both reading and writing. If that
498** fails, try opening it read-only. If the file does not exist,
499** try to create it.
500**
501** On success, a handle for the open file is written to *id
502** and *pReadonly is set to 0 if the file was opened for reading and
503** writing or 1 if the file was opened read-only. The function returns
504** SQLITE_OK.
505**
506** On failure, the function returns SQLITE_CANTOPEN and leaves
507** *id and *pReadonly unchanged.
508*/
509int sqlite3OsOpenReadWrite(
510 const char *zFilename,
511 OsFile *id,
512 int *pReadonly
513){
514 int rc;
drhda71ce12004-06-21 18:14:45 +0000515 assert( !id->isOpen );
drhbbd42a62004-05-22 17:41:58 +0000516 id->dirfd = -1;
drh2b4b5962005-06-15 17:47:55 +0000517 SET_THREADID(id);
drh8e855772005-05-17 11:25:31 +0000518 id->h = open(zFilename, O_RDWR|O_CREAT|O_LARGEFILE|O_BINARY,
519 SQLITE_DEFAULT_FILE_PERMISSIONS);
drha6abd042004-06-09 17:37:22 +0000520 if( id->h<0 ){
drh6458e392004-07-20 01:14:13 +0000521#ifdef EISDIR
522 if( errno==EISDIR ){
523 return SQLITE_CANTOPEN;
524 }
525#endif
drha6abd042004-06-09 17:37:22 +0000526 id->h = open(zFilename, O_RDONLY|O_LARGEFILE|O_BINARY);
527 if( id->h<0 ){
drhbbd42a62004-05-22 17:41:58 +0000528 return SQLITE_CANTOPEN;
529 }
530 *pReadonly = 1;
531 }else{
532 *pReadonly = 0;
533 }
534 sqlite3OsEnterMutex();
drha6abd042004-06-09 17:37:22 +0000535 rc = findLockInfo(id->h, &id->pLock, &id->pOpen);
drhbbd42a62004-05-22 17:41:58 +0000536 sqlite3OsLeaveMutex();
537 if( rc ){
drha6abd042004-06-09 17:37:22 +0000538 close(id->h);
drhbbd42a62004-05-22 17:41:58 +0000539 return SQLITE_NOMEM;
540 }
danielk197713adf8a2004-06-03 16:08:41 +0000541 id->locktype = 0;
drhda71ce12004-06-21 18:14:45 +0000542 id->isOpen = 1;
drha6abd042004-06-09 17:37:22 +0000543 TRACE3("OPEN %-3d %s\n", id->h, zFilename);
drhbbd42a62004-05-22 17:41:58 +0000544 OpenCounter(+1);
545 return SQLITE_OK;
546}
547
548
549/*
550** Attempt to open a new file for exclusive access by this process.
551** The file will be opened for both reading and writing. To avoid
552** a potential security problem, we do not allow the file to have
553** previously existed. Nor do we allow the file to be a symbolic
554** link.
555**
556** If delFlag is true, then make arrangements to automatically delete
557** the file when it is closed.
558**
559** On success, write the file handle into *id and return SQLITE_OK.
560**
561** On failure, return SQLITE_CANTOPEN.
562*/
563int sqlite3OsOpenExclusive(const char *zFilename, OsFile *id, int delFlag){
564 int rc;
drhda71ce12004-06-21 18:14:45 +0000565 assert( !id->isOpen );
drhbbd42a62004-05-22 17:41:58 +0000566 if( access(zFilename, 0)==0 ){
567 return SQLITE_CANTOPEN;
568 }
drh2b4b5962005-06-15 17:47:55 +0000569 SET_THREADID(id);
drhbbd42a62004-05-22 17:41:58 +0000570 id->dirfd = -1;
drha6abd042004-06-09 17:37:22 +0000571 id->h = open(zFilename,
drhd6459672005-08-13 17:17:01 +0000572 O_RDWR|O_CREAT|O_EXCL|O_NOFOLLOW|O_LARGEFILE|O_BINARY,
573 SQLITE_DEFAULT_FILE_PERMISSIONS);
drha6abd042004-06-09 17:37:22 +0000574 if( id->h<0 ){
drhbbd42a62004-05-22 17:41:58 +0000575 return SQLITE_CANTOPEN;
576 }
577 sqlite3OsEnterMutex();
drha6abd042004-06-09 17:37:22 +0000578 rc = findLockInfo(id->h, &id->pLock, &id->pOpen);
drhbbd42a62004-05-22 17:41:58 +0000579 sqlite3OsLeaveMutex();
580 if( rc ){
drha6abd042004-06-09 17:37:22 +0000581 close(id->h);
drhbbd42a62004-05-22 17:41:58 +0000582 unlink(zFilename);
583 return SQLITE_NOMEM;
584 }
danielk197713adf8a2004-06-03 16:08:41 +0000585 id->locktype = 0;
drhda71ce12004-06-21 18:14:45 +0000586 id->isOpen = 1;
drhbbd42a62004-05-22 17:41:58 +0000587 if( delFlag ){
588 unlink(zFilename);
589 }
drha6abd042004-06-09 17:37:22 +0000590 TRACE3("OPEN-EX %-3d %s\n", id->h, zFilename);
drhbbd42a62004-05-22 17:41:58 +0000591 OpenCounter(+1);
592 return SQLITE_OK;
593}
594
595/*
596** Attempt to open a new file for read-only access.
597**
598** On success, write the file handle into *id and return SQLITE_OK.
599**
600** On failure, return SQLITE_CANTOPEN.
601*/
602int sqlite3OsOpenReadOnly(const char *zFilename, OsFile *id){
603 int rc;
drhda71ce12004-06-21 18:14:45 +0000604 assert( !id->isOpen );
drh2b4b5962005-06-15 17:47:55 +0000605 SET_THREADID(id);
drhbbd42a62004-05-22 17:41:58 +0000606 id->dirfd = -1;
drha6abd042004-06-09 17:37:22 +0000607 id->h = open(zFilename, O_RDONLY|O_LARGEFILE|O_BINARY);
608 if( id->h<0 ){
drhbbd42a62004-05-22 17:41:58 +0000609 return SQLITE_CANTOPEN;
610 }
611 sqlite3OsEnterMutex();
drha6abd042004-06-09 17:37:22 +0000612 rc = findLockInfo(id->h, &id->pLock, &id->pOpen);
drhbbd42a62004-05-22 17:41:58 +0000613 sqlite3OsLeaveMutex();
614 if( rc ){
drha6abd042004-06-09 17:37:22 +0000615 close(id->h);
drhbbd42a62004-05-22 17:41:58 +0000616 return SQLITE_NOMEM;
617 }
danielk197713adf8a2004-06-03 16:08:41 +0000618 id->locktype = 0;
drhda71ce12004-06-21 18:14:45 +0000619 id->isOpen = 1;
drha6abd042004-06-09 17:37:22 +0000620 TRACE3("OPEN-RO %-3d %s\n", id->h, zFilename);
drhbbd42a62004-05-22 17:41:58 +0000621 OpenCounter(+1);
622 return SQLITE_OK;
623}
624
625/*
626** Attempt to open a file descriptor for the directory that contains a
627** file. This file descriptor can be used to fsync() the directory
628** in order to make sure the creation of a new file is actually written
629** to disk.
630**
631** This routine is only meaningful for Unix. It is a no-op under
632** windows since windows does not support hard links.
633**
634** On success, a handle for a previously open file is at *id is
635** updated with the new directory file descriptor and SQLITE_OK is
636** returned.
637**
638** On failure, the function returns SQLITE_CANTOPEN and leaves
639** *id unchanged.
640*/
641int sqlite3OsOpenDirectory(
642 const char *zDirname,
643 OsFile *id
644){
drhda71ce12004-06-21 18:14:45 +0000645 if( !id->isOpen ){
drhbbd42a62004-05-22 17:41:58 +0000646 /* Do not open the directory if the corresponding file is not already
647 ** open. */
648 return SQLITE_CANTOPEN;
649 }
drh2b4b5962005-06-15 17:47:55 +0000650 SET_THREADID(id);
drhbbd42a62004-05-22 17:41:58 +0000651 assert( id->dirfd<0 );
drh8e855772005-05-17 11:25:31 +0000652 id->dirfd = open(zDirname, O_RDONLY|O_BINARY, 0);
drhbbd42a62004-05-22 17:41:58 +0000653 if( id->dirfd<0 ){
654 return SQLITE_CANTOPEN;
655 }
656 TRACE3("OPENDIR %-3d %s\n", id->dirfd, zDirname);
657 return SQLITE_OK;
658}
659
660/*
drhab3f9fe2004-08-14 17:10:10 +0000661** If the following global variable points to a string which is the
662** name of a directory, then that directory will be used to store
663** temporary files.
664*/
tpoindex9a09a3c2004-12-20 19:01:32 +0000665char *sqlite3_temp_directory = 0;
drhab3f9fe2004-08-14 17:10:10 +0000666
667/*
drhbbd42a62004-05-22 17:41:58 +0000668** Create a temporary file name in zBuf. zBuf must be big enough to
669** hold at least SQLITE_TEMPNAME_SIZE characters.
670*/
671int sqlite3OsTempFileName(char *zBuf){
672 static const char *azDirs[] = {
drhab3f9fe2004-08-14 17:10:10 +0000673 0,
drhbbd42a62004-05-22 17:41:58 +0000674 "/var/tmp",
675 "/usr/tmp",
676 "/tmp",
677 ".",
678 };
drh57196282004-10-06 15:41:16 +0000679 static const unsigned char zChars[] =
drhbbd42a62004-05-22 17:41:58 +0000680 "abcdefghijklmnopqrstuvwxyz"
681 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
682 "0123456789";
683 int i, j;
684 struct stat buf;
685 const char *zDir = ".";
drheffd02b2004-08-29 23:42:13 +0000686 azDirs[0] = sqlite3_temp_directory;
drhbbd42a62004-05-22 17:41:58 +0000687 for(i=0; i<sizeof(azDirs)/sizeof(azDirs[0]); i++){
drhab3f9fe2004-08-14 17:10:10 +0000688 if( azDirs[i]==0 ) continue;
drhbbd42a62004-05-22 17:41:58 +0000689 if( stat(azDirs[i], &buf) ) continue;
690 if( !S_ISDIR(buf.st_mode) ) continue;
691 if( access(azDirs[i], 07) ) continue;
692 zDir = azDirs[i];
693 break;
694 }
695 do{
696 sprintf(zBuf, "%s/"TEMP_FILE_PREFIX, zDir);
697 j = strlen(zBuf);
698 sqlite3Randomness(15, &zBuf[j]);
699 for(i=0; i<15; i++, j++){
700 zBuf[j] = (char)zChars[ ((unsigned char)zBuf[j])%(sizeof(zChars)-1) ];
701 }
702 zBuf[j] = 0;
703 }while( access(zBuf,0)==0 );
704 return SQLITE_OK;
705}
706
drh268283b2005-01-08 15:44:25 +0000707#ifndef SQLITE_OMIT_PAGER_PRAGMAS
drhbbd42a62004-05-22 17:41:58 +0000708/*
tpoindex9a09a3c2004-12-20 19:01:32 +0000709** Check that a given pathname is a directory and is writable
710**
711*/
712int sqlite3OsIsDirWritable(char *zBuf){
713 struct stat buf;
714 if( zBuf==0 ) return 0;
drh268283b2005-01-08 15:44:25 +0000715 if( zBuf[0]==0 ) return 0;
tpoindex9a09a3c2004-12-20 19:01:32 +0000716 if( stat(zBuf, &buf) ) return 0;
717 if( !S_ISDIR(buf.st_mode) ) return 0;
718 if( access(zBuf, 07) ) return 0;
719 return 1;
720}
drh268283b2005-01-08 15:44:25 +0000721#endif /* SQLITE_OMIT_PAGER_PRAGMAS */
tpoindex9a09a3c2004-12-20 19:01:32 +0000722
723/*
drhbbd42a62004-05-22 17:41:58 +0000724** Read data from a file into a buffer. Return SQLITE_OK if all
725** bytes were read successfully and SQLITE_IOERR if anything goes
726** wrong.
727*/
728int sqlite3OsRead(OsFile *id, void *pBuf, int amt){
729 int got;
drhda71ce12004-06-21 18:14:45 +0000730 assert( id->isOpen );
drhbbd42a62004-05-22 17:41:58 +0000731 SimulateIOError(SQLITE_IOERR);
732 TIMER_START;
drha6abd042004-06-09 17:37:22 +0000733 got = read(id->h, pBuf, amt);
drhbbd42a62004-05-22 17:41:58 +0000734 TIMER_END;
drhe29b9152005-03-18 14:03:15 +0000735 TRACE5("READ %-3d %5d %7d %d\n", id->h, got, last_page, TIMER_ELAPSED);
drhbbd42a62004-05-22 17:41:58 +0000736 SEEK(0);
737 /* if( got<0 ) got = 0; */
738 if( got==amt ){
739 return SQLITE_OK;
740 }else{
741 return SQLITE_IOERR;
742 }
743}
744
745/*
746** Write data from a buffer into a file. Return SQLITE_OK on success
747** or some other error code on failure.
748*/
749int sqlite3OsWrite(OsFile *id, const void *pBuf, int amt){
750 int wrote = 0;
drhda71ce12004-06-21 18:14:45 +0000751 assert( id->isOpen );
drh4c7f9412005-02-03 00:29:47 +0000752 assert( amt>0 );
drhbbd42a62004-05-22 17:41:58 +0000753 SimulateIOError(SQLITE_IOERR);
drh047d4832004-10-01 14:38:02 +0000754 SimulateDiskfullError;
drhbbd42a62004-05-22 17:41:58 +0000755 TIMER_START;
drha6abd042004-06-09 17:37:22 +0000756 while( amt>0 && (wrote = write(id->h, pBuf, amt))>0 ){
drhbbd42a62004-05-22 17:41:58 +0000757 amt -= wrote;
758 pBuf = &((char*)pBuf)[wrote];
759 }
760 TIMER_END;
drhe29b9152005-03-18 14:03:15 +0000761 TRACE5("WRITE %-3d %5d %7d %d\n", id->h, wrote, last_page, TIMER_ELAPSED);
drhbbd42a62004-05-22 17:41:58 +0000762 SEEK(0);
763 if( amt>0 ){
764 return SQLITE_FULL;
765 }
766 return SQLITE_OK;
767}
768
769/*
770** Move the read/write pointer in a file.
771*/
drheb206252004-10-01 02:00:31 +0000772int sqlite3OsSeek(OsFile *id, i64 offset){
drhda71ce12004-06-21 18:14:45 +0000773 assert( id->isOpen );
drhbbd42a62004-05-22 17:41:58 +0000774 SEEK(offset/1024 + 1);
drhb4746b92005-09-09 01:32:06 +0000775#ifdef SQLITE_TEST
776 if( offset ) SimulateDiskfullError
777#endif
drha6abd042004-06-09 17:37:22 +0000778 lseek(id->h, offset, SEEK_SET);
drhbbd42a62004-05-22 17:41:58 +0000779 return SQLITE_OK;
780}
781
drhb851b2c2005-03-10 14:11:12 +0000782#ifdef SQLITE_TEST
783/*
784** Count the number of fullsyncs and normal syncs. This is used to test
785** that syncs and fullsyncs are occuring at the right times.
786*/
787int sqlite3_sync_count = 0;
788int sqlite3_fullsync_count = 0;
789#endif
790
791
drhbbd42a62004-05-22 17:41:58 +0000792/*
drhdd809b02004-07-17 21:44:57 +0000793** The fsync() system call does not work as advertised on many
794** unix systems. The following procedure is an attempt to make
795** it work better.
drh1398ad32005-01-19 23:24:50 +0000796**
797** The SQLITE_NO_SYNC macro disables all fsync()s. This is useful
798** for testing when we want to run through the test suite quickly.
799** You are strongly advised *not* to deploy with SQLITE_NO_SYNC
800** enabled, however, since with SQLITE_NO_SYNC enabled, an OS crash
801** or power failure will likely corrupt the database file.
drhdd809b02004-07-17 21:44:57 +0000802*/
drheb796a72005-09-08 12:38:41 +0000803static int full_fsync(int fd, int fullSync, int dataOnly){
drhdd809b02004-07-17 21:44:57 +0000804 int rc;
drhb851b2c2005-03-10 14:11:12 +0000805
806 /* Record the number of times that we do a normal fsync() and
807 ** FULLSYNC. This is used during testing to verify that this procedure
808 ** gets called with the correct arguments.
809 */
810#ifdef SQLITE_TEST
811 if( fullSync ) sqlite3_fullsync_count++;
812 sqlite3_sync_count++;
813#endif
814
815 /* If we compiled with the SQLITE_NO_SYNC flag, then syncing is a
816 ** no-op
817 */
818#ifdef SQLITE_NO_SYNC
819 rc = SQLITE_OK;
820#else
821
drhdd809b02004-07-17 21:44:57 +0000822#ifdef F_FULLFSYNC
drhb851b2c2005-03-10 14:11:12 +0000823 if( fullSync ){
drhf30cc942005-03-11 17:52:34 +0000824 rc = fcntl(fd, F_FULLFSYNC, 0);
drhb851b2c2005-03-10 14:11:12 +0000825 }else{
826 rc = 1;
827 }
828 /* If the FULLSYNC failed, try to do a normal fsync() */
drhdd809b02004-07-17 21:44:57 +0000829 if( rc ) rc = fsync(fd);
drhb851b2c2005-03-10 14:11:12 +0000830
drhdd809b02004-07-17 21:44:57 +0000831#else
drheb796a72005-09-08 12:38:41 +0000832 if( dataOnly ){
833 rc = fdatasync(fd);
834 }else{
835 rc = fsync(fd);
836 }
drhf30cc942005-03-11 17:52:34 +0000837#endif /* defined(F_FULLFSYNC) */
drhb851b2c2005-03-10 14:11:12 +0000838#endif /* defined(SQLITE_NO_SYNC) */
839
drhdd809b02004-07-17 21:44:57 +0000840 return rc;
841}
842
843/*
drhbbd42a62004-05-22 17:41:58 +0000844** Make sure all writes to a particular file are committed to disk.
845**
drheb796a72005-09-08 12:38:41 +0000846** If dataOnly==0 then both the file itself and its metadata (file
847** size, access time, etc) are synced. If dataOnly!=0 then only the
848** file data is synced.
849**
drhbbd42a62004-05-22 17:41:58 +0000850** Under Unix, also make sure that the directory entry for the file
851** has been created by fsync-ing the directory that contains the file.
852** If we do not do this and we encounter a power failure, the directory
853** entry for the journal might not exist after we reboot. The next
854** SQLite to access the file will not know that the journal exists (because
855** the directory entry for the journal was never created) and the transaction
856** will not roll back - possibly leading to database corruption.
857*/
drheb796a72005-09-08 12:38:41 +0000858int sqlite3OsSync(OsFile *id, int dataOnly){
drhda71ce12004-06-21 18:14:45 +0000859 assert( id->isOpen );
drhbbd42a62004-05-22 17:41:58 +0000860 SimulateIOError(SQLITE_IOERR);
drha6abd042004-06-09 17:37:22 +0000861 TRACE2("SYNC %-3d\n", id->h);
drheb796a72005-09-08 12:38:41 +0000862 if( full_fsync(id->h, id->fullSync, dataOnly) ){
drhbbd42a62004-05-22 17:41:58 +0000863 return SQLITE_IOERR;
drhbbd42a62004-05-22 17:41:58 +0000864 }
drha2854222004-06-17 19:04:17 +0000865 if( id->dirfd>=0 ){
866 TRACE2("DIRSYNC %-3d\n", id->dirfd);
drheb796a72005-09-08 12:38:41 +0000867 full_fsync(id->dirfd, id->fullSync, 0);
drha2854222004-06-17 19:04:17 +0000868 close(id->dirfd); /* Only need to sync once, so close the directory */
869 id->dirfd = -1; /* when we are done. */
870 }
drha2854222004-06-17 19:04:17 +0000871 return SQLITE_OK;
drhbbd42a62004-05-22 17:41:58 +0000872}
873
874/*
danielk1977962398d2004-06-14 09:35:16 +0000875** Sync the directory zDirname. This is a no-op on operating systems other
876** than UNIX.
drhb851b2c2005-03-10 14:11:12 +0000877**
878** This is used to make sure the master journal file has truely been deleted
879** before making changes to individual journals on a multi-database commit.
drhf30cc942005-03-11 17:52:34 +0000880** The F_FULLFSYNC option is not needed here.
danielk1977962398d2004-06-14 09:35:16 +0000881*/
882int sqlite3OsSyncDirectory(const char *zDirname){
883 int fd;
884 int r;
danielk1977369f27e2004-06-15 11:40:04 +0000885 SimulateIOError(SQLITE_IOERR);
drh8e855772005-05-17 11:25:31 +0000886 fd = open(zDirname, O_RDONLY|O_BINARY, 0);
danielk1977369f27e2004-06-15 11:40:04 +0000887 TRACE3("DIRSYNC %-3d (%s)\n", fd, zDirname);
danielk1977962398d2004-06-14 09:35:16 +0000888 if( fd<0 ){
889 return SQLITE_CANTOPEN;
890 }
891 r = fsync(fd);
892 close(fd);
893 return ((r==0)?SQLITE_OK:SQLITE_IOERR);
894}
895
896/*
drhbbd42a62004-05-22 17:41:58 +0000897** Truncate an open file to a specified size
898*/
drheb206252004-10-01 02:00:31 +0000899int sqlite3OsTruncate(OsFile *id, i64 nByte){
drhda71ce12004-06-21 18:14:45 +0000900 assert( id->isOpen );
drhbbd42a62004-05-22 17:41:58 +0000901 SimulateIOError(SQLITE_IOERR);
drha6abd042004-06-09 17:37:22 +0000902 return ftruncate(id->h, nByte)==0 ? SQLITE_OK : SQLITE_IOERR;
drhbbd42a62004-05-22 17:41:58 +0000903}
904
905/*
906** Determine the current size of a file in bytes
907*/
drheb206252004-10-01 02:00:31 +0000908int sqlite3OsFileSize(OsFile *id, i64 *pSize){
drhbbd42a62004-05-22 17:41:58 +0000909 struct stat buf;
drhda71ce12004-06-21 18:14:45 +0000910 assert( id->isOpen );
drhbbd42a62004-05-22 17:41:58 +0000911 SimulateIOError(SQLITE_IOERR);
drha6abd042004-06-09 17:37:22 +0000912 if( fstat(id->h, &buf)!=0 ){
drhbbd42a62004-05-22 17:41:58 +0000913 return SQLITE_IOERR;
914 }
915 *pSize = buf.st_size;
916 return SQLITE_OK;
917}
918
danielk19779a1d0ab2004-06-01 14:09:28 +0000919/*
danielk197713adf8a2004-06-03 16:08:41 +0000920** This routine checks if there is a RESERVED lock held on the specified
921** file by this or any other process. If such a lock is held, return
drh2ac3ee92004-06-07 16:27:46 +0000922** non-zero. If the file is unlocked or holds only SHARED locks, then
923** return zero.
danielk197713adf8a2004-06-03 16:08:41 +0000924*/
drha6abd042004-06-09 17:37:22 +0000925int sqlite3OsCheckReservedLock(OsFile *id){
danielk197713adf8a2004-06-03 16:08:41 +0000926 int r = 0;
927
drhda71ce12004-06-21 18:14:45 +0000928 assert( id->isOpen );
drh2b4b5962005-06-15 17:47:55 +0000929 if( CHECK_THREADID(id) ) return SQLITE_MISUSE;
drh2ac3ee92004-06-07 16:27:46 +0000930 sqlite3OsEnterMutex(); /* Needed because id->pLock is shared across threads */
danielk197713adf8a2004-06-03 16:08:41 +0000931
932 /* Check if a thread in this process holds such a lock */
933 if( id->pLock->locktype>SHARED_LOCK ){
934 r = 1;
935 }
936
drh2ac3ee92004-06-07 16:27:46 +0000937 /* Otherwise see if some other process holds it.
danielk197713adf8a2004-06-03 16:08:41 +0000938 */
939 if( !r ){
940 struct flock lock;
941 lock.l_whence = SEEK_SET;
drh2ac3ee92004-06-07 16:27:46 +0000942 lock.l_start = RESERVED_BYTE;
943 lock.l_len = 1;
944 lock.l_type = F_WRLCK;
drha6abd042004-06-09 17:37:22 +0000945 fcntl(id->h, F_GETLK, &lock);
danielk197713adf8a2004-06-03 16:08:41 +0000946 if( lock.l_type!=F_UNLCK ){
947 r = 1;
948 }
949 }
950
951 sqlite3OsLeaveMutex();
drha6abd042004-06-09 17:37:22 +0000952 TRACE3("TEST WR-LOCK %d %d\n", id->h, r);
danielk197713adf8a2004-06-03 16:08:41 +0000953
954 return r;
955}
956
danielk19772b444852004-06-29 07:45:33 +0000957#ifdef SQLITE_DEBUG
958/*
959** Helper function for printing out trace information from debugging
960** binaries. This returns the string represetation of the supplied
961** integer lock-type.
962*/
963static const char * locktypeName(int locktype){
964 switch( locktype ){
965 case NO_LOCK: return "NONE";
966 case SHARED_LOCK: return "SHARED";
967 case RESERVED_LOCK: return "RESERVED";
968 case PENDING_LOCK: return "PENDING";
969 case EXCLUSIVE_LOCK: return "EXCLUSIVE";
970 }
971 return "ERROR";
972}
973#endif
974
danielk197713adf8a2004-06-03 16:08:41 +0000975/*
danielk19779a1d0ab2004-06-01 14:09:28 +0000976** Lock the file with the lock specified by parameter locktype - one
977** of the following:
978**
drh2ac3ee92004-06-07 16:27:46 +0000979** (1) SHARED_LOCK
980** (2) RESERVED_LOCK
981** (3) PENDING_LOCK
982** (4) EXCLUSIVE_LOCK
983**
drhb3e04342004-06-08 00:47:47 +0000984** Sometimes when requesting one lock state, additional lock states
985** are inserted in between. The locking might fail on one of the later
986** transitions leaving the lock state different from what it started but
987** still short of its goal. The following chart shows the allowed
988** transitions and the inserted intermediate states:
989**
990** UNLOCKED -> SHARED
991** SHARED -> RESERVED
992** SHARED -> (PENDING) -> EXCLUSIVE
993** RESERVED -> (PENDING) -> EXCLUSIVE
994** PENDING -> EXCLUSIVE
drh2ac3ee92004-06-07 16:27:46 +0000995**
drha6abd042004-06-09 17:37:22 +0000996** This routine will only increase a lock. Use the sqlite3OsUnlock()
997** routine to lower a locking level.
danielk19779a1d0ab2004-06-01 14:09:28 +0000998*/
999int sqlite3OsLock(OsFile *id, int locktype){
danielk1977f42f25c2004-06-25 07:21:28 +00001000 /* The following describes the implementation of the various locks and
1001 ** lock transitions in terms of the POSIX advisory shared and exclusive
1002 ** lock primitives (called read-locks and write-locks below, to avoid
1003 ** confusion with SQLite lock names). The algorithms are complicated
1004 ** slightly in order to be compatible with windows systems simultaneously
1005 ** accessing the same database file, in case that is ever required.
1006 **
1007 ** Symbols defined in os.h indentify the 'pending byte' and the 'reserved
1008 ** byte', each single bytes at well known offsets, and the 'shared byte
1009 ** range', a range of 510 bytes at a well known offset.
1010 **
1011 ** To obtain a SHARED lock, a read-lock is obtained on the 'pending
1012 ** byte'. If this is successful, a random byte from the 'shared byte
1013 ** range' is read-locked and the lock on the 'pending byte' released.
1014 **
danielk197790ba3bd2004-06-25 08:32:25 +00001015 ** A process may only obtain a RESERVED lock after it has a SHARED lock.
1016 ** A RESERVED lock is implemented by grabbing a write-lock on the
1017 ** 'reserved byte'.
danielk1977f42f25c2004-06-25 07:21:28 +00001018 **
1019 ** A process may only obtain a PENDING lock after it has obtained a
danielk197790ba3bd2004-06-25 08:32:25 +00001020 ** SHARED lock. A PENDING lock is implemented by obtaining a write-lock
1021 ** on the 'pending byte'. This ensures that no new SHARED locks can be
1022 ** obtained, but existing SHARED locks are allowed to persist. A process
1023 ** does not have to obtain a RESERVED lock on the way to a PENDING lock.
1024 ** This property is used by the algorithm for rolling back a journal file
1025 ** after a crash.
danielk1977f42f25c2004-06-25 07:21:28 +00001026 **
danielk197790ba3bd2004-06-25 08:32:25 +00001027 ** An EXCLUSIVE lock, obtained after a PENDING lock is held, is
1028 ** implemented by obtaining a write-lock on the entire 'shared byte
1029 ** range'. Since all other locks require a read-lock on one of the bytes
1030 ** within this range, this ensures that no other locks are held on the
1031 ** database.
danielk1977f42f25c2004-06-25 07:21:28 +00001032 **
1033 ** The reason a single byte cannot be used instead of the 'shared byte
1034 ** range' is that some versions of windows do not support read-locks. By
1035 ** locking a random byte from a range, concurrent SHARED locks may exist
1036 ** even if the locking primitive used is always a write-lock.
1037 */
danielk19779a1d0ab2004-06-01 14:09:28 +00001038 int rc = SQLITE_OK;
1039 struct lockInfo *pLock = id->pLock;
1040 struct flock lock;
1041 int s;
1042
drhda71ce12004-06-21 18:14:45 +00001043 assert( id->isOpen );
drhe29b9152005-03-18 14:03:15 +00001044 TRACE7("LOCK %d %s was %s(%s,%d) pid=%d\n", id->h, locktypeName(locktype),
danielk19772b444852004-06-29 07:45:33 +00001045 locktypeName(id->locktype), locktypeName(pLock->locktype), pLock->cnt
1046 ,getpid() );
drh2b4b5962005-06-15 17:47:55 +00001047 if( CHECK_THREADID(id) ) return SQLITE_MISUSE;
danielk19779a1d0ab2004-06-01 14:09:28 +00001048
1049 /* If there is already a lock of this type or more restrictive on the
1050 ** OsFile, do nothing. Don't use the end_lock: exit path, as
1051 ** sqlite3OsEnterMutex() hasn't been called yet.
1052 */
danielk197713adf8a2004-06-03 16:08:41 +00001053 if( id->locktype>=locktype ){
drhe29b9152005-03-18 14:03:15 +00001054 TRACE3("LOCK %d %s ok (already held)\n", id->h, locktypeName(locktype));
danielk19779a1d0ab2004-06-01 14:09:28 +00001055 return SQLITE_OK;
1056 }
1057
drhb3e04342004-06-08 00:47:47 +00001058 /* Make sure the locking sequence is correct
drh2ac3ee92004-06-07 16:27:46 +00001059 */
drhb3e04342004-06-08 00:47:47 +00001060 assert( id->locktype!=NO_LOCK || locktype==SHARED_LOCK );
1061 assert( locktype!=PENDING_LOCK );
1062 assert( locktype!=RESERVED_LOCK || id->locktype==SHARED_LOCK );
drh2ac3ee92004-06-07 16:27:46 +00001063
drhb3e04342004-06-08 00:47:47 +00001064 /* This mutex is needed because id->pLock is shared across threads
1065 */
1066 sqlite3OsEnterMutex();
danielk19779a1d0ab2004-06-01 14:09:28 +00001067
1068 /* If some thread using this PID has a lock via a different OsFile*
1069 ** handle that precludes the requested lock, return BUSY.
1070 */
danielk197713adf8a2004-06-03 16:08:41 +00001071 if( (id->locktype!=pLock->locktype &&
drh2ac3ee92004-06-07 16:27:46 +00001072 (pLock->locktype>=PENDING_LOCK || locktype>SHARED_LOCK))
danielk19779a1d0ab2004-06-01 14:09:28 +00001073 ){
1074 rc = SQLITE_BUSY;
1075 goto end_lock;
1076 }
1077
1078 /* If a SHARED lock is requested, and some thread using this PID already
1079 ** has a SHARED or RESERVED lock, then increment reference counts and
1080 ** return SQLITE_OK.
1081 */
1082 if( locktype==SHARED_LOCK &&
1083 (pLock->locktype==SHARED_LOCK || pLock->locktype==RESERVED_LOCK) ){
1084 assert( locktype==SHARED_LOCK );
danielk197713adf8a2004-06-03 16:08:41 +00001085 assert( id->locktype==0 );
danielk1977ecb2a962004-06-02 06:30:16 +00001086 assert( pLock->cnt>0 );
danielk197713adf8a2004-06-03 16:08:41 +00001087 id->locktype = SHARED_LOCK;
danielk19779a1d0ab2004-06-01 14:09:28 +00001088 pLock->cnt++;
1089 id->pOpen->nLock++;
1090 goto end_lock;
1091 }
1092
danielk197713adf8a2004-06-03 16:08:41 +00001093 lock.l_len = 1L;
drh2b4b5962005-06-15 17:47:55 +00001094
danielk19779a1d0ab2004-06-01 14:09:28 +00001095 lock.l_whence = SEEK_SET;
1096
drh3cde3bb2004-06-12 02:17:14 +00001097 /* A PENDING lock is needed before acquiring a SHARED lock and before
1098 ** acquiring an EXCLUSIVE lock. For the SHARED lock, the PENDING will
1099 ** be released.
danielk19779a1d0ab2004-06-01 14:09:28 +00001100 */
drh3cde3bb2004-06-12 02:17:14 +00001101 if( locktype==SHARED_LOCK
1102 || (locktype==EXCLUSIVE_LOCK && id->locktype<PENDING_LOCK)
1103 ){
danielk1977489468c2004-06-28 08:25:47 +00001104 lock.l_type = (locktype==SHARED_LOCK?F_RDLCK:F_WRLCK);
drh2ac3ee92004-06-07 16:27:46 +00001105 lock.l_start = PENDING_BYTE;
drha6abd042004-06-09 17:37:22 +00001106 s = fcntl(id->h, F_SETLK, &lock);
danielk19779a1d0ab2004-06-01 14:09:28 +00001107 if( s ){
1108 rc = (errno==EINVAL) ? SQLITE_NOLFS : SQLITE_BUSY;
1109 goto end_lock;
1110 }
drh3cde3bb2004-06-12 02:17:14 +00001111 }
1112
1113
1114 /* If control gets to this point, then actually go ahead and make
1115 ** operating system calls for the specified lock.
1116 */
1117 if( locktype==SHARED_LOCK ){
1118 assert( pLock->cnt==0 );
1119 assert( pLock->locktype==0 );
danielk19779a1d0ab2004-06-01 14:09:28 +00001120
drh2ac3ee92004-06-07 16:27:46 +00001121 /* Now get the read-lock */
1122 lock.l_start = SHARED_FIRST;
1123 lock.l_len = SHARED_SIZE;
drha6abd042004-06-09 17:37:22 +00001124 s = fcntl(id->h, F_SETLK, &lock);
drh2ac3ee92004-06-07 16:27:46 +00001125
1126 /* Drop the temporary PENDING lock */
1127 lock.l_start = PENDING_BYTE;
1128 lock.l_len = 1L;
danielk19779a1d0ab2004-06-01 14:09:28 +00001129 lock.l_type = F_UNLCK;
drh2b4b5962005-06-15 17:47:55 +00001130 if( fcntl(id->h, F_SETLK, &lock)!=0 ){
1131 rc = SQLITE_IOERR; /* This should never happen */
1132 goto end_lock;
1133 }
danielk19779a1d0ab2004-06-01 14:09:28 +00001134 if( s ){
drhbbd42a62004-05-22 17:41:58 +00001135 rc = (errno==EINVAL) ? SQLITE_NOLFS : SQLITE_BUSY;
1136 }else{
danielk197713adf8a2004-06-03 16:08:41 +00001137 id->locktype = SHARED_LOCK;
danielk1977ecb2a962004-06-02 06:30:16 +00001138 id->pOpen->nLock++;
danielk19779a1d0ab2004-06-01 14:09:28 +00001139 pLock->cnt = 1;
drhbbd42a62004-05-22 17:41:58 +00001140 }
drh3cde3bb2004-06-12 02:17:14 +00001141 }else if( locktype==EXCLUSIVE_LOCK && pLock->cnt>1 ){
1142 /* We are trying for an exclusive lock but another thread in this
1143 ** same process is still holding a shared lock. */
1144 rc = SQLITE_BUSY;
drhbbd42a62004-05-22 17:41:58 +00001145 }else{
drh3cde3bb2004-06-12 02:17:14 +00001146 /* The request was for a RESERVED or EXCLUSIVE lock. It is
danielk19779a1d0ab2004-06-01 14:09:28 +00001147 ** assumed that there is a SHARED or greater lock on the file
1148 ** already.
1149 */
danielk197713adf8a2004-06-03 16:08:41 +00001150 assert( 0!=id->locktype );
danielk19779a1d0ab2004-06-01 14:09:28 +00001151 lock.l_type = F_WRLCK;
1152 switch( locktype ){
1153 case RESERVED_LOCK:
drh2ac3ee92004-06-07 16:27:46 +00001154 lock.l_start = RESERVED_BYTE;
danielk19779a1d0ab2004-06-01 14:09:28 +00001155 break;
danielk19779a1d0ab2004-06-01 14:09:28 +00001156 case EXCLUSIVE_LOCK:
drh2ac3ee92004-06-07 16:27:46 +00001157 lock.l_start = SHARED_FIRST;
1158 lock.l_len = SHARED_SIZE;
danielk19779a1d0ab2004-06-01 14:09:28 +00001159 break;
1160 default:
1161 assert(0);
1162 }
drha6abd042004-06-09 17:37:22 +00001163 s = fcntl(id->h, F_SETLK, &lock);
danielk19779a1d0ab2004-06-01 14:09:28 +00001164 if( s ){
1165 rc = (errno==EINVAL) ? SQLITE_NOLFS : SQLITE_BUSY;
1166 }
drhbbd42a62004-05-22 17:41:58 +00001167 }
danielk19779a1d0ab2004-06-01 14:09:28 +00001168
danielk1977ecb2a962004-06-02 06:30:16 +00001169 if( rc==SQLITE_OK ){
danielk197713adf8a2004-06-03 16:08:41 +00001170 id->locktype = locktype;
danielk1977ecb2a962004-06-02 06:30:16 +00001171 pLock->locktype = locktype;
drh3cde3bb2004-06-12 02:17:14 +00001172 }else if( locktype==EXCLUSIVE_LOCK ){
1173 id->locktype = PENDING_LOCK;
1174 pLock->locktype = PENDING_LOCK;
danielk1977ecb2a962004-06-02 06:30:16 +00001175 }
danielk19779a1d0ab2004-06-01 14:09:28 +00001176
1177end_lock:
drhbbd42a62004-05-22 17:41:58 +00001178 sqlite3OsLeaveMutex();
drhe29b9152005-03-18 14:03:15 +00001179 TRACE4("LOCK %d %s %s\n", id->h, locktypeName(locktype),
danielk19772b444852004-06-29 07:45:33 +00001180 rc==SQLITE_OK ? "ok" : "failed");
drhbbd42a62004-05-22 17:41:58 +00001181 return rc;
1182}
1183
1184/*
drha6abd042004-06-09 17:37:22 +00001185** Lower the locking level on file descriptor id to locktype. locktype
1186** must be either NO_LOCK or SHARED_LOCK.
1187**
1188** If the locking level of the file descriptor is already at or below
1189** the requested locking level, this routine is a no-op.
1190**
drh9c105bb2004-10-02 20:38:28 +00001191** It is not possible for this routine to fail if the second argument
1192** is NO_LOCK. If the second argument is SHARED_LOCK, this routine
1193** might return SQLITE_IOERR instead of SQLITE_OK.
drhbbd42a62004-05-22 17:41:58 +00001194*/
drha6abd042004-06-09 17:37:22 +00001195int sqlite3OsUnlock(OsFile *id, int locktype){
1196 struct lockInfo *pLock;
1197 struct flock lock;
drh9c105bb2004-10-02 20:38:28 +00001198 int rc = SQLITE_OK;
drha6abd042004-06-09 17:37:22 +00001199
drhda71ce12004-06-21 18:14:45 +00001200 assert( id->isOpen );
drhe29b9152005-03-18 14:03:15 +00001201 TRACE7("UNLOCK %d %d was %d(%d,%d) pid=%d\n", id->h, locktype, id->locktype,
danielk19772b444852004-06-29 07:45:33 +00001202 id->pLock->locktype, id->pLock->cnt, getpid());
drh2b4b5962005-06-15 17:47:55 +00001203 if( CHECK_THREADID(id) ) return SQLITE_MISUSE;
drha6abd042004-06-09 17:37:22 +00001204
1205 assert( locktype<=SHARED_LOCK );
1206 if( id->locktype<=locktype ){
1207 return SQLITE_OK;
1208 }
drhbbd42a62004-05-22 17:41:58 +00001209 sqlite3OsEnterMutex();
drha6abd042004-06-09 17:37:22 +00001210 pLock = id->pLock;
1211 assert( pLock->cnt!=0 );
1212 if( id->locktype>SHARED_LOCK ){
1213 assert( pLock->locktype==id->locktype );
drh9c105bb2004-10-02 20:38:28 +00001214 if( locktype==SHARED_LOCK ){
1215 lock.l_type = F_RDLCK;
1216 lock.l_whence = SEEK_SET;
1217 lock.l_start = SHARED_FIRST;
1218 lock.l_len = SHARED_SIZE;
1219 if( fcntl(id->h, F_SETLK, &lock)!=0 ){
1220 /* This should never happen */
1221 rc = SQLITE_IOERR;
1222 }
1223 }
drhbbd42a62004-05-22 17:41:58 +00001224 lock.l_type = F_UNLCK;
1225 lock.l_whence = SEEK_SET;
drha6abd042004-06-09 17:37:22 +00001226 lock.l_start = PENDING_BYTE;
1227 lock.l_len = 2L; assert( PENDING_BYTE+1==RESERVED_BYTE );
drh2b4b5962005-06-15 17:47:55 +00001228 if( fcntl(id->h, F_SETLK, &lock)==0 ){
1229 pLock->locktype = SHARED_LOCK;
1230 }else{
1231 rc = SQLITE_IOERR; /* This should never happen */
1232 }
drhbbd42a62004-05-22 17:41:58 +00001233 }
drha6abd042004-06-09 17:37:22 +00001234 if( locktype==NO_LOCK ){
1235 struct openCnt *pOpen;
danielk1977ecb2a962004-06-02 06:30:16 +00001236
drha6abd042004-06-09 17:37:22 +00001237 /* Decrement the shared lock counter. Release the lock using an
1238 ** OS call only when all threads in this same process have released
1239 ** the lock.
1240 */
1241 pLock->cnt--;
1242 if( pLock->cnt==0 ){
1243 lock.l_type = F_UNLCK;
1244 lock.l_whence = SEEK_SET;
1245 lock.l_start = lock.l_len = 0L;
drh2b4b5962005-06-15 17:47:55 +00001246 if( fcntl(id->h, F_SETLK, &lock)==0 ){
1247 pLock->locktype = NO_LOCK;
1248 }else{
1249 rc = SQLITE_IOERR; /* This should never happen */
1250 }
drha6abd042004-06-09 17:37:22 +00001251 }
1252
drhbbd42a62004-05-22 17:41:58 +00001253 /* Decrement the count of locks against this same file. When the
1254 ** count reaches zero, close any other file descriptors whose close
1255 ** was deferred because of outstanding locks.
1256 */
drha6abd042004-06-09 17:37:22 +00001257 pOpen = id->pOpen;
drhbbd42a62004-05-22 17:41:58 +00001258 pOpen->nLock--;
1259 assert( pOpen->nLock>=0 );
1260 if( pOpen->nLock==0 && pOpen->nPending>0 ){
1261 int i;
1262 for(i=0; i<pOpen->nPending; i++){
1263 close(pOpen->aPending[i]);
1264 }
1265 sqliteFree(pOpen->aPending);
1266 pOpen->nPending = 0;
1267 pOpen->aPending = 0;
1268 }
1269 }
1270 sqlite3OsLeaveMutex();
drha6abd042004-06-09 17:37:22 +00001271 id->locktype = locktype;
drh9c105bb2004-10-02 20:38:28 +00001272 return rc;
drhbbd42a62004-05-22 17:41:58 +00001273}
1274
1275/*
danielk1977e3026632004-06-22 11:29:02 +00001276** Close a file.
1277*/
1278int sqlite3OsClose(OsFile *id){
1279 if( !id->isOpen ) return SQLITE_OK;
drh2b4b5962005-06-15 17:47:55 +00001280 if( CHECK_THREADID(id) ) return SQLITE_MISUSE;
danielk1977e3026632004-06-22 11:29:02 +00001281 sqlite3OsUnlock(id, NO_LOCK);
1282 if( id->dirfd>=0 ) close(id->dirfd);
1283 id->dirfd = -1;
1284 sqlite3OsEnterMutex();
1285 if( id->pOpen->nLock ){
1286 /* If there are outstanding locks, do not actually close the file just
1287 ** yet because that would clear those locks. Instead, add the file
1288 ** descriptor to pOpen->aPending. It will be automatically closed when
1289 ** the last lock is cleared.
1290 */
1291 int *aNew;
1292 struct openCnt *pOpen = id->pOpen;
drhad81e872005-08-21 21:45:01 +00001293 aNew = sqliteRealloc( pOpen->aPending, (pOpen->nPending+1)*sizeof(int) );
danielk1977e3026632004-06-22 11:29:02 +00001294 if( aNew==0 ){
1295 /* If a malloc fails, just leak the file descriptor */
1296 }else{
1297 pOpen->aPending = aNew;
drhad81e872005-08-21 21:45:01 +00001298 pOpen->aPending[pOpen->nPending] = id->h;
1299 pOpen->nPending++;
danielk1977e3026632004-06-22 11:29:02 +00001300 }
1301 }else{
1302 /* There are no outstanding locks so we can close the file immediately */
1303 close(id->h);
1304 }
1305 releaseLockInfo(id->pLock);
1306 releaseOpenCnt(id->pOpen);
1307 sqlite3OsLeaveMutex();
1308 id->isOpen = 0;
1309 TRACE2("CLOSE %-3d\n", id->h);
1310 OpenCounter(-1);
1311 return SQLITE_OK;
1312}
1313
1314/*
drh0ccebe72005-06-07 22:22:50 +00001315** Turn a relative pathname into a full pathname. Return a pointer
1316** to the full pathname stored in space obtained from sqliteMalloc().
1317** The calling function is responsible for freeing this space once it
1318** is no longer needed.
1319*/
1320char *sqlite3OsFullPathname(const char *zRelative){
1321 char *zFull = 0;
1322 if( zRelative[0]=='/' ){
1323 sqlite3SetString(&zFull, zRelative, (char*)0);
1324 }else{
drh79158e12005-09-06 21:40:45 +00001325 char *zBuf = sqliteMalloc(5000);
1326 if( zBuf==0 ){
1327 return 0;
1328 }
drh0ccebe72005-06-07 22:22:50 +00001329 zBuf[0] = 0;
drh79158e12005-09-06 21:40:45 +00001330 sqlite3SetString(&zFull, getcwd(zBuf, 5000), "/", zRelative,
drh0ccebe72005-06-07 22:22:50 +00001331 (char*)0);
drh79158e12005-09-06 21:40:45 +00001332 sqliteFree(zBuf);
drh0ccebe72005-06-07 22:22:50 +00001333 }
1334 return zFull;
1335}
1336
1337
1338#endif /* SQLITE_OMIT_DISKIO */
1339/***************************************************************************
1340** Everything above deals with file I/O. Everything that follows deals
1341** with other miscellanous aspects of the operating system interface
1342****************************************************************************/
1343
1344
1345/*
drhbbd42a62004-05-22 17:41:58 +00001346** Get information to seed the random number generator. The seed
1347** is written into the buffer zBuf[256]. The calling function must
1348** supply a sufficiently large buffer.
1349*/
1350int sqlite3OsRandomSeed(char *zBuf){
1351 /* We have to initialize zBuf to prevent valgrind from reporting
1352 ** errors. The reports issued by valgrind are incorrect - we would
1353 ** prefer that the randomness be increased by making use of the
1354 ** uninitialized space in zBuf - but valgrind errors tend to worry
1355 ** some users. Rather than argue, it seems easier just to initialize
1356 ** the whole array and silence valgrind, even if that means less randomness
1357 ** in the random seed.
1358 **
1359 ** When testing, initializing zBuf[] to zero is all we do. That means
1360 ** that we always use the same random number sequence.* This makes the
1361 ** tests repeatable.
1362 */
1363 memset(zBuf, 0, 256);
1364#if !defined(SQLITE_TEST)
1365 {
drh842b8642005-01-21 17:53:17 +00001366 int pid, fd;
1367 fd = open("/dev/urandom", O_RDONLY);
1368 if( fd<0 ){
1369 time((time_t*)zBuf);
1370 pid = getpid();
1371 memcpy(&zBuf[sizeof(time_t)], &pid, sizeof(pid));
1372 }else{
1373 read(fd, zBuf, 256);
1374 close(fd);
1375 }
drhbbd42a62004-05-22 17:41:58 +00001376 }
1377#endif
1378 return SQLITE_OK;
1379}
1380
1381/*
1382** Sleep for a little while. Return the amount of time slept.
1383*/
1384int sqlite3OsSleep(int ms){
1385#if defined(HAVE_USLEEP) && HAVE_USLEEP
1386 usleep(ms*1000);
1387 return ms;
1388#else
1389 sleep((ms+999)/1000);
1390 return 1000*((ms+999)/1000);
1391#endif
1392}
1393
1394/*
1395** Static variables used for thread synchronization
1396*/
1397static int inMutex = 0;
drh79069752004-05-22 21:30:40 +00001398#ifdef SQLITE_UNIX_THREADS
drhbbd42a62004-05-22 17:41:58 +00001399static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
drh79069752004-05-22 21:30:40 +00001400#endif
drhbbd42a62004-05-22 17:41:58 +00001401
1402/*
1403** The following pair of routine implement mutual exclusion for
1404** multi-threaded processes. Only a single thread is allowed to
1405** executed code that is surrounded by EnterMutex() and LeaveMutex().
1406**
1407** SQLite uses only a single Mutex. There is not much critical
1408** code and what little there is executes quickly and without blocking.
1409*/
1410void sqlite3OsEnterMutex(){
1411#ifdef SQLITE_UNIX_THREADS
1412 pthread_mutex_lock(&mutex);
1413#endif
1414 assert( !inMutex );
1415 inMutex = 1;
1416}
1417void sqlite3OsLeaveMutex(){
1418 assert( inMutex );
1419 inMutex = 0;
1420#ifdef SQLITE_UNIX_THREADS
1421 pthread_mutex_unlock(&mutex);
1422#endif
1423}
1424
1425/*
drhbbd42a62004-05-22 17:41:58 +00001426** The following variable, if set to a non-zero value, becomes the result
1427** returned from sqlite3OsCurrentTime(). This is used for testing.
1428*/
1429#ifdef SQLITE_TEST
1430int sqlite3_current_time = 0;
1431#endif
1432
1433/*
1434** Find the current time (in Universal Coordinated Time). Write the
1435** current time and date as a Julian Day number into *prNow and
1436** return 0. Return 1 if the time and date cannot be found.
1437*/
1438int sqlite3OsCurrentTime(double *prNow){
drh19e2d372005-08-29 23:00:03 +00001439#ifdef NO_GETTOD
drhbbd42a62004-05-22 17:41:58 +00001440 time_t t;
1441 time(&t);
1442 *prNow = t/86400.0 + 2440587.5;
drh19e2d372005-08-29 23:00:03 +00001443#else
1444 struct timeval sNow;
1445 struct timezone sTz; /* Not used */
1446 gettimeofday(&sNow, &sTz);
1447 *prNow = 2440587.5 + sNow.tv_sec/86400.0 + sNow.tv_usec/86400000000.0;
1448#endif
drhbbd42a62004-05-22 17:41:58 +00001449#ifdef SQLITE_TEST
1450 if( sqlite3_current_time ){
1451 *prNow = sqlite3_current_time/86400.0 + 2440587.5;
1452 }
1453#endif
1454 return 0;
1455}
1456
drhbbd42a62004-05-22 17:41:58 +00001457#endif /* OS_UNIX */