Blame - src/os_unix.c - chromium.googlesource.com/chromium/deps/sqlite

blob: f2ac94e12684a55adeda5e5eeaa0599d1a3fec58 [file] [log] [blame]

drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1	/*
				2	** 2004 May 22
				3	**
				4	** The author disclaims copyright to this source code. In place of
				5	** a legal notice, here is a blessing:
				6	**
				7	** May you do good and not evil.
				8	** May you find forgiveness for yourself and forgive others.
				9	** May you share freely, never taking more than you give.
				10	**
				11	******************************************************************************
				12	**
				13	** This file contains code that is specific to Unix systems.
				14	*/
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	15	#include "sqliteInt.h"
drh	eb20625	2004-10-01 02:00:31 +0000	[diff] [blame]	16	#include "os.h"
				17	#if OS_UNIX /* This file is used on unix only */
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame]	18	/*
				19	** These #defines should enable >2GB file support on Posix if the
				20	** underlying operating system supports it. If the OS lacks
				21	** large file support, or if the OS is windows, these should be no-ops.
				22	**
				23	** Large file support can be disabled using the -DSQLITE_DISABLE_LFS switch
				24	** on the compiler command line. This is necessary if you are compiling
				25	** on a recent machine (ex: RedHat 7.2) but you want your code to work
				26	** on an older machine (ex: RedHat 6.0). If you compile on RedHat 7.2
				27	** without this option, LFS is enable. But LFS does not exist in the kernel
				28	** in RedHat 6.0, so the code won't work. Hence, for maximum binary
				29	** portability you should omit LFS.
				30	**
				31	** Similar is true for MacOS. LFS is only supported on MacOS 9 and later.
				32	*/
				33	#ifndef SQLITE_DISABLE_LFS
				34	# define _LARGE_FILE 1
				35	# ifndef _FILE_OFFSET_BITS
				36	# define _FILE_OFFSET_BITS 64
				37	# endif
				38	# define _LARGEFILE_SOURCE 1
				39	#endif
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	40
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame]	41	/*
				42	** standard include files.
				43	*/
				44	#include <sys/types.h>
				45	#include <sys/stat.h>
				46	#include <fcntl.h>
				47	#include <unistd.h>
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	48	#include <time.h>
drh	19e2d37	2005-08-29 23:00:03 +0000	[diff] [blame]	49	#include <sys/time.h>
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	50	#include <errno.h>
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame]	51
				52	/*
				53	** Macros used to determine whether or not to use threads. The
				54	** SQLITE_UNIX_THREADS macro is defined if we are synchronizing for
				55	** Posix threads and SQLITE_W32_THREADS is defined if we are
				56	** synchronizing using Win32 threads.
				57	*/
				58	#if defined(THREADSAFE) && THREADSAFE
				59	# include <pthread.h>
				60	# define SQLITE_UNIX_THREADS 1
				61	#endif
				62
				63	/*
				64	** Default permissions when creating a new file
				65	*/
				66	#ifndef SQLITE_DEFAULT_FILE_PERMISSIONS
				67	# define SQLITE_DEFAULT_FILE_PERMISSIONS 0644
				68	#endif
				69
				70
				71
				72	/*
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	73	** The unixFile structure is subclass of OsFile specific for the unix
				74	** protability layer.
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame]	75	*/
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	76	typedef struct unixFile unixFile;
				77	struct unixFile {
				78	IoMethod const pMethod; / Always the first entry */
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame]	79	struct openCnt pOpen; / Info about all open fd's on this inode */
				80	struct lockInfo pLock; / Info about locks on this inode */
				81	int h; /* The file descriptor */
				82	unsigned char locktype; /* The type of lock held on this fd */
				83	unsigned char isOpen; /* True if needs to be closed */
				84	unsigned char fullSync; /* Use F_FULLSYNC if available */
				85	int dirfd; /* File descriptor for the directory */
				86	#ifdef SQLITE_UNIX_THREADS
				87	pthread_t tid; /* The thread authorized to use this OsFile */
				88	#endif
				89	};
				90
drh	0ccebe7	2005-06-07 22:22:50 +0000	[diff] [blame]	91
				92	/*
				93	** Do not include any of the File I/O interface procedures if the
				94	** SQLITE_OMIT_DISKIO macro is defined (indicating that there database
				95	** will be in-memory only)
				96	*/
				97	#ifndef SQLITE_OMIT_DISKIO
				98
				99
				100	/*
				101	** Define various macros that are missing from some systems.
				102	*/
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	103	#ifndef O_LARGEFILE
				104	# define O_LARGEFILE 0
				105	#endif
				106	#ifdef SQLITE_DISABLE_LFS
				107	# undef O_LARGEFILE
				108	# define O_LARGEFILE 0
				109	#endif
				110	#ifndef O_NOFOLLOW
				111	# define O_NOFOLLOW 0
				112	#endif
				113	#ifndef O_BINARY
				114	# define O_BINARY 0
				115	#endif
				116
				117	/*
				118	** The DJGPP compiler environment looks mostly like Unix, but it
				119	** lacks the fcntl() system call. So redefine fcntl() to be something
				120	** that always succeeds. This means that locking does not occur under
danielk1977	26c5d79	2005-11-25 09:01:23 +0000	[diff] [blame]	121	** DJGPP. But it's DOS - what did you expect?
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	122	*/
				123	#ifdef __DJGPP__
				124	# define fcntl(A,B,C) 0
				125	#endif
				126
				127	/*
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	128	** Include code that is common to all os_*.c files
				129	*/
				130	#include "os_common.h"
				131
drh	2b4b596	2005-06-15 17:47:55 +0000	[diff] [blame]	132	/*
				133	** The threadid macro resolves to the thread-id or to 0. Used for
				134	** testing and debugging only.
				135	*/
				136	#ifdef SQLITE_UNIX_THREADS
				137	#define threadid pthread_self()
				138	#else
				139	#define threadid 0
				140	#endif
				141
				142	/*
				143	** Set or check the OsFile.tid field. This field is set when an OsFile
				144	** is first opened. All subsequent uses of the OsFile verify that the
				145	** same thread is operating on the OsFile. Some operating systems do
				146	** not allow locks to be overridden by other threads and that restriction
				147	** means that sqlite3* database handles cannot be moved from one thread
				148	** to another. This logic makes sure a user does not try to do that
				149	** by mistake.
				150	*/
drh	91636d5	2005-11-24 23:14:00 +0000	[diff] [blame]	151	#if defined(SQLITE_UNIX_THREADS) && !defined(SQLITE_ALLOW_XTHREAD_CONNECTIONS)
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame]	152	# define SET_THREADID(X) (X)->tid = pthread_self()
				153	# define CHECK_THREADID(X) (!pthread_equal((X)->tid, pthread_self()))
drh	2b4b596	2005-06-15 17:47:55 +0000	[diff] [blame]	154	#else
				155	# define SET_THREADID(X)
				156	# define CHECK_THREADID(X) 0
danielk1977	13adf8a	2004-06-03 16:08:41 +0000	[diff] [blame]	157	#endif
				158
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	159	/*
				160	** Here is the dirt on POSIX advisory locks: ANSI STD 1003.1 (1996)
				161	** section 6.5.2.2 lines 483 through 490 specify that when a process
				162	** sets or clears a lock, that operation overrides any prior locks set
				163	** by the same process. It does not explicitly say so, but this implies
				164	** that it overrides locks set by the same process using a different
				165	** file descriptor. Consider this test case:
				166	**
				167	** int fd1 = open("./file1", O_RDWR\|O_CREAT, 0644);
				168	** int fd2 = open("./file2", O_RDWR\|O_CREAT, 0644);
				169	**
				170	** Suppose ./file1 and ./file2 are really the same file (because
				171	** one is a hard or symbolic link to the other) then if you set
				172	** an exclusive lock on fd1, then try to get an exclusive lock
				173	** on fd2, it works. I would have expected the second lock to
				174	** fail since there was already a lock on the file due to fd1.
				175	** But not so. Since both locks came from the same process, the
				176	** second overrides the first, even though they were on different
				177	** file descriptors opened on different file names.
				178	**
				179	** Bummer. If you ask me, this is broken. Badly broken. It means
				180	** that we cannot use POSIX locks to synchronize file access among
				181	** competing threads of the same process. POSIX locks will work fine
				182	** to synchronize access for threads in separate processes, but not
				183	** threads within the same process.
				184	**
				185	** To work around the problem, SQLite has to manage file locks internally
				186	** on its own. Whenever a new database is opened, we have to find the
				187	** specific inode of the database file (the inode is determined by the
				188	** st_dev and st_ino fields of the stat structure that fstat() fills in)
				189	** and check for locks already existing on that inode. When locks are
				190	** created or removed, we have to look at our own internal record of the
				191	** locks to see if another thread has previously set a lock on that same
				192	** inode.
				193	**
				194	** The OsFile structure for POSIX is no longer just an integer file
				195	** descriptor. It is now a structure that holds the integer file
				196	** descriptor and a pointer to a structure that describes the internal
				197	** locks on the corresponding inode. There is one locking structure
				198	** per inode, so if the same inode is opened twice, both OsFile structures
				199	** point to the same locking structure. The locking structure keeps
				200	** a reference count (so we will know when to delete it) and a "cnt"
				201	** field that tells us its internal lock status. cnt==0 means the
				202	** file is unlocked. cnt==-1 means the file has an exclusive lock.
				203	** cnt>0 means there are cnt shared locks on the file.
				204	**
				205	** Any attempt to lock or unlock a file first checks the locking
				206	** structure. The fcntl() system call is only invoked to set a
				207	** POSIX lock if the internal lock structure transitions between
				208	** a locked and an unlocked state.
				209	**
				210	** 2004-Jan-11:
				211	** More recent discoveries about POSIX advisory locks. (The more
				212	** I discover, the more I realize the a POSIX advisory locks are
				213	** an abomination.)
				214	**
				215	** If you close a file descriptor that points to a file that has locks,
				216	** all locks on that file that are owned by the current process are
				217	** released. To work around this problem, each OsFile structure contains
				218	** a pointer to an openCnt structure. There is one openCnt structure
				219	** per open inode, which means that multiple OsFiles can point to a single
				220	** openCnt. When an attempt is made to close an OsFile, if there are
				221	** other OsFiles open on the same inode that are holding locks, the call
				222	** to close() the file descriptor is deferred until all of the locks clear.
				223	** The openCnt structure keeps a list of file descriptors that need to
				224	** be closed and that list is walked (and cleared) when the last lock
				225	** clears.
				226	**
				227	** First, under Linux threads, because each thread has a separate
				228	** process ID, lock operations in one thread do not override locks
				229	** to the same file in other threads. Linux threads behave like
				230	** separate processes in this respect. But, if you close a file
				231	** descriptor in linux threads, all locks are cleared, even locks
				232	** on other threads and even though the other threads have different
				233	** process IDs. Linux threads is inconsistent in this respect.
				234	** (I'm beginning to think that linux threads is an abomination too.)
				235	** The consequence of this all is that the hash table for the lockInfo
				236	** structure has to include the process id as part of its key because
				237	** locks in different threads are treated as distinct. But the
				238	** openCnt structure should not include the process id in its
				239	** key because close() clears lock on all threads, not just the current
				240	** thread. Were it not for this goofiness in linux threads, we could
				241	** combine the lockInfo and openCnt structures into a single structure.
drh	5fdae77	2004-06-29 03:29:00 +0000	[diff] [blame]	242	**
				243	** 2004-Jun-28:
				244	** On some versions of linux, threads can override each others locks.
				245	** On others not. Sometimes you can change the behavior on the same
				246	** system by setting the LD_ASSUME_KERNEL environment variable. The
				247	** POSIX standard is silent as to which behavior is correct, as far
				248	** as I can tell, so other versions of unix might show the same
				249	** inconsistency. There is no little doubt in my mind that posix
				250	** advisory locks and linux threads are profoundly broken.
				251	**
				252	** To work around the inconsistencies, we have to test at runtime
				253	** whether or not threads can override each others locks. This test
				254	** is run once, the first time any lock is attempted. A static
				255	** variable is set to record the results of this test for future
				256	** use.
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	257	*/
				258
				259	/*
				260	** An instance of the following structure serves as the key used
drh	5fdae77	2004-06-29 03:29:00 +0000	[diff] [blame]	261	** to locate a particular lockInfo structure given its inode.
				262	**
				263	** If threads cannot override each others locks, then we set the
				264	** lockKey.tid field to the thread ID. If threads can override
				265	** each others locks then tid is always set to zero. tid is also
				266	** set to zero if we compile without threading support.
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	267	*/
				268	struct lockKey {
drh	5fdae77	2004-06-29 03:29:00 +0000	[diff] [blame]	269	dev_t dev; /* Device number */
				270	ino_t ino; /* Inode number */
				271	#ifdef SQLITE_UNIX_THREADS
drh	d9cb6ac	2005-10-20 07:28:17 +0000	[diff] [blame]	272	pthread_t tid; /* Thread ID or zero if threads can override each other */
drh	5fdae77	2004-06-29 03:29:00 +0000	[diff] [blame]	273	#endif
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	274	};
				275
				276	/*
				277	** An instance of the following structure is allocated for each open
				278	** inode on each thread with a different process ID. (Threads have
				279	** different process IDs on linux, but not on most other unixes.)
				280	**
				281	** A single inode can have multiple file descriptors, so each OsFile
				282	** structure contains a pointer to an instance of this object and this
				283	** object keeps a count of the number of OsFiles pointing to it.
				284	*/
				285	struct lockInfo {
				286	struct lockKey key; /* The lookup key */
drh	2ac3ee9	2004-06-07 16:27:46 +0000	[diff] [blame]	287	int cnt; /* Number of SHARED locks held */
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	288	int locktype; /* One of SHARED_LOCK, RESERVED_LOCK etc. */
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	289	int nRef; /* Number of pointers to this structure */
				290	};
				291
				292	/*
				293	** An instance of the following structure serves as the key used
				294	** to locate a particular openCnt structure given its inode. This
drh	5fdae77	2004-06-29 03:29:00 +0000	[diff] [blame]	295	** is the same as the lockKey except that the thread ID is omitted.
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	296	*/
				297	struct openKey {
				298	dev_t dev; /* Device number */
				299	ino_t ino; /* Inode number */
				300	};
				301
				302	/*
				303	** An instance of the following structure is allocated for each open
				304	** inode. This structure keeps track of the number of locks on that
				305	** inode. If a close is attempted against an inode that is holding
				306	** locks, the close is deferred until all locks clear by adding the
				307	** file descriptor to be closed to the pending list.
				308	*/
				309	struct openCnt {
				310	struct openKey key; /* The lookup key */
				311	int nRef; /* Number of pointers to this structure */
				312	int nLock; /* Number of outstanding locks */
				313	int nPending; /* Number of pending close() operations */
				314	int aPending; / Malloced space holding fd's awaiting a close() */
				315	};
				316
				317	/*
				318	** These hash table maps inodes and process IDs into lockInfo and openCnt
				319	** structures. Access to these hash tables must be protected by a mutex.
				320	*/
				321	static Hash lockHash = { SQLITE_HASH_BINARY, 0, 0, 0, 0, 0 };
				322	static Hash openHash = { SQLITE_HASH_BINARY, 0, 0, 0, 0, 0 };
				323
drh	5fdae77	2004-06-29 03:29:00 +0000	[diff] [blame]	324
				325	#ifdef SQLITE_UNIX_THREADS
				326	/*
				327	** This variable records whether or not threads can override each others
				328	** locks.
				329	**
				330	** 0: No. Threads cannot override each others locks.
				331	** 1: Yes. Threads can override each others locks.
				332	** -1: We don't know yet.
				333	*/
				334	static int threadsOverrideEachOthersLocks = -1;
				335
				336	/*
				337	** This structure holds information passed into individual test
				338	** threads by the testThreadLockingBehavior() routine.
				339	*/
				340	struct threadTestData {
				341	int fd; /* File to be locked */
				342	struct flock lock; /* The locking operation */
				343	int result; /* Result of the locking operation */
				344	};
				345
drh	2b4b596	2005-06-15 17:47:55 +0000	[diff] [blame]	346	#ifdef SQLITE_LOCK_TRACE
				347	/*
				348	** Print out information about all locking operations.
				349	**
				350	** This routine is used for troubleshooting locks on multithreaded
				351	** platforms. Enable by compiling with the -DSQLITE_LOCK_TRACE
				352	** command-line option on the compiler. This code is normally
				353	** turnned off.
				354	*/
				355	static int lockTrace(int fd, int op, struct flock *p){
				356	char zOpName, zType;
				357	int s;
				358	int savedErrno;
				359	if( op==F_GETLK ){
				360	zOpName = "GETLK";
				361	}else if( op==F_SETLK ){
				362	zOpName = "SETLK";
				363	}else{
				364	s = fcntl(fd, op, p);
				365	sqlite3DebugPrintf("fcntl unknown %d %d %d\n", fd, op, s);
				366	return s;
				367	}
				368	if( p->l_type==F_RDLCK ){
				369	zType = "RDLCK";
				370	}else if( p->l_type==F_WRLCK ){
				371	zType = "WRLCK";
				372	}else if( p->l_type==F_UNLCK ){
				373	zType = "UNLCK";
				374	}else{
				375	assert( 0 );
				376	}
				377	assert( p->l_whence==SEEK_SET );
				378	s = fcntl(fd, op, p);
				379	savedErrno = errno;
				380	sqlite3DebugPrintf("fcntl %d %d %s %s %d %d %d %d\n",
				381	threadid, fd, zOpName, zType, (int)p->l_start, (int)p->l_len,
				382	(int)p->l_pid, s);
				383	if( s && op==F_SETLK && (p->l_type==F_RDLCK \|\| p->l_type==F_WRLCK) ){
				384	struct flock l2;
				385	l2 = *p;
				386	fcntl(fd, F_GETLK, &l2);
				387	if( l2.l_type==F_RDLCK ){
				388	zType = "RDLCK";
				389	}else if( l2.l_type==F_WRLCK ){
				390	zType = "WRLCK";
				391	}else if( l2.l_type==F_UNLCK ){
				392	zType = "UNLCK";
				393	}else{
				394	assert( 0 );
				395	}
				396	sqlite3DebugPrintf("fcntl-failure-reason: %s %d %d %d\n",
				397	zType, (int)l2.l_start, (int)l2.l_len, (int)l2.l_pid);
				398	}
				399	errno = savedErrno;
				400	return s;
				401	}
				402	#define fcntl lockTrace
				403	#endif /* SQLITE_LOCK_TRACE */
				404
drh	5fdae77	2004-06-29 03:29:00 +0000	[diff] [blame]	405	/*
				406	** The testThreadLockingBehavior() routine launches two separate
				407	** threads on this routine. This routine attempts to lock a file
				408	** descriptor then returns. The success or failure of that attempt
				409	** allows the testThreadLockingBehavior() procedure to determine
				410	** whether or not threads can override each others locks.
				411	*/
				412	static void threadLockingTest(void pArg){
				413	struct threadTestData pData = (struct threadTestData)pArg;
				414	pData->result = fcntl(pData->fd, F_SETLK, &pData->lock);
				415	return pArg;
				416	}
				417
				418	/*
				419	** This procedure attempts to determine whether or not threads
				420	** can override each others locks then sets the
				421	** threadsOverrideEachOthersLocks variable appropriately.
				422	*/
				423	static void testThreadLockingBehavior(fd_orig){
				424	int fd;
				425	struct threadTestData d[2];
				426	pthread_t t[2];
				427
				428	fd = dup(fd_orig);
				429	if( fd<0 ) return;
				430	memset(d, 0, sizeof(d));
				431	d[0].fd = fd;
				432	d[0].lock.l_type = F_RDLCK;
				433	d[0].lock.l_len = 1;
				434	d[0].lock.l_start = 0;
				435	d[0].lock.l_whence = SEEK_SET;
				436	d[1] = d[0];
				437	d[1].lock.l_type = F_WRLCK;
				438	pthread_create(&t[0], 0, threadLockingTest, &d[0]);
				439	pthread_create(&t[1], 0, threadLockingTest, &d[1]);
				440	pthread_join(t[0], 0);
				441	pthread_join(t[1], 0);
				442	close(fd);
				443	threadsOverrideEachOthersLocks = d[0].result==0 && d[1].result==0;
				444	}
				445	#endif /* SQLITE_UNIX_THREADS */
				446
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	447	/*
				448	** Release a lockInfo structure previously allocated by findLockInfo().
				449	*/
				450	static void releaseLockInfo(struct lockInfo *pLock){
				451	pLock->nRef--;
				452	if( pLock->nRef==0 ){
				453	sqlite3HashInsert(&lockHash, &pLock->key, sizeof(pLock->key), 0);
				454	sqliteFree(pLock);
				455	}
				456	}
				457
				458	/*
				459	** Release a openCnt structure previously allocated by findLockInfo().
				460	*/
				461	static void releaseOpenCnt(struct openCnt *pOpen){
				462	pOpen->nRef--;
				463	if( pOpen->nRef==0 ){
				464	sqlite3HashInsert(&openHash, &pOpen->key, sizeof(pOpen->key), 0);
				465	sqliteFree(pOpen->aPending);
				466	sqliteFree(pOpen);
				467	}
				468	}
				469
				470	/*
				471	** Given a file descriptor, locate lockInfo and openCnt structures that
				472	** describes that file descriptor. Create a new ones if necessary. The
				473	** return values might be unset if an error occurs.
				474	**
				475	** Return the number of errors.
				476	*/
drh	38f8271	2004-06-18 17:10:16 +0000	[diff] [blame]	477	static int findLockInfo(
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	478	int fd, /* The file descriptor used in the key */
				479	struct lockInfo *ppLock, / Return the lockInfo structure here */
drh	5fdae77	2004-06-29 03:29:00 +0000	[diff] [blame]	480	struct openCnt *ppOpen / Return the openCnt structure here */
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	481	){
				482	int rc;
				483	struct lockKey key1;
				484	struct openKey key2;
				485	struct stat statbuf;
				486	struct lockInfo *pLock;
				487	struct openCnt *pOpen;
danielk1977	441b09a	2006-01-05 13:48:29 +0000	[diff] [blame^]	488	SqliteTsd *pTsd = sqlite3Tsd();
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	489	rc = fstat(fd, &statbuf);
				490	if( rc!=0 ) return 1;
danielk1977	441b09a	2006-01-05 13:48:29 +0000	[diff] [blame^]	491
				492	/* Disable the sqlite3_release_memory() function */
				493	assert( !pTsd->disableReleaseMemory );
				494	pTsd->disableReleaseMemory = 1;
				495
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	496	memset(&key1, 0, sizeof(key1));
				497	key1.dev = statbuf.st_dev;
				498	key1.ino = statbuf.st_ino;
drh	5fdae77	2004-06-29 03:29:00 +0000	[diff] [blame]	499	#ifdef SQLITE_UNIX_THREADS
				500	if( threadsOverrideEachOthersLocks<0 ){
				501	testThreadLockingBehavior(fd);
				502	}
				503	key1.tid = threadsOverrideEachOthersLocks ? 0 : pthread_self();
				504	#endif
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	505	memset(&key2, 0, sizeof(key2));
				506	key2.dev = statbuf.st_dev;
				507	key2.ino = statbuf.st_ino;
				508	pLock = (struct lockInfo*)sqlite3HashFind(&lockHash, &key1, sizeof(key1));
				509	if( pLock==0 ){
				510	struct lockInfo *pOld;
				511	pLock = sqliteMallocRaw( sizeof(*pLock) );
danielk1977	441b09a	2006-01-05 13:48:29 +0000	[diff] [blame^]	512	if( pLock==0 ){
				513	rc = 1;
				514	goto exit_findlockinfo;
				515	}
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	516	pLock->key = key1;
				517	pLock->nRef = 1;
				518	pLock->cnt = 0;
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	519	pLock->locktype = 0;
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	520	pOld = sqlite3HashInsert(&lockHash, &pLock->key, sizeof(key1), pLock);
				521	if( pOld!=0 ){
				522	assert( pOld==pLock );
				523	sqliteFree(pLock);
danielk1977	441b09a	2006-01-05 13:48:29 +0000	[diff] [blame^]	524	rc = 1;
				525	goto exit_findlockinfo;
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	526	}
				527	}else{
				528	pLock->nRef++;
				529	}
				530	*ppLock = pLock;
				531	pOpen = (struct openCnt*)sqlite3HashFind(&openHash, &key2, sizeof(key2));
				532	if( pOpen==0 ){
				533	struct openCnt *pOld;
				534	pOpen = sqliteMallocRaw( sizeof(*pOpen) );
				535	if( pOpen==0 ){
				536	releaseLockInfo(pLock);
danielk1977	441b09a	2006-01-05 13:48:29 +0000	[diff] [blame^]	537	rc = 1;
				538	goto exit_findlockinfo;
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	539	}
				540	pOpen->key = key2;
				541	pOpen->nRef = 1;
				542	pOpen->nLock = 0;
				543	pOpen->nPending = 0;
				544	pOpen->aPending = 0;
				545	pOld = sqlite3HashInsert(&openHash, &pOpen->key, sizeof(key2), pOpen);
				546	if( pOld!=0 ){
				547	assert( pOld==pOpen );
				548	sqliteFree(pOpen);
				549	releaseLockInfo(pLock);
danielk1977	441b09a	2006-01-05 13:48:29 +0000	[diff] [blame^]	550	rc = 1;
				551	goto exit_findlockinfo;
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	552	}
				553	}else{
				554	pOpen->nRef++;
				555	}
				556	*ppOpen = pOpen;
danielk1977	441b09a	2006-01-05 13:48:29 +0000	[diff] [blame^]	557
				558	exit_findlockinfo:
				559	/* Re-enable sqlite3_release_memory() */
				560	pTsd->disableReleaseMemory = 0;
				561	return rc;
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	562	}
				563
				564	/*
				565	** Delete the named file
				566	*/
drh	9c06c95	2005-11-26 00:25:00 +0000	[diff] [blame]	567	static int unixDelete(const char *zFilename){
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	568	unlink(zFilename);
				569	return SQLITE_OK;
				570	}
				571
				572	/*
				573	** Return TRUE if the named file exists.
				574	*/
drh	9c06c95	2005-11-26 00:25:00 +0000	[diff] [blame]	575	static int unixFileExists(const char *zFilename){
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	576	return access(zFilename, 0)==0;
				577	}
				578
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	579	/* Forward declaration */
				580	static int allocateUnixFile(unixFile pInit, OsFile *pId);
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame]	581
				582	/*
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	583	** Attempt to open a file for both reading and writing. If that
				584	** fails, try opening it read-only. If the file does not exist,
				585	** try to create it.
				586	**
				587	** On success, a handle for the open file is written to *id
				588	** and *pReadonly is set to 0 if the file was opened for reading and
				589	** writing or 1 if the file was opened read-only. The function returns
				590	** SQLITE_OK.
				591	**
				592	** On failure, the function returns SQLITE_CANTOPEN and leaves
				593	** id and pReadonly unchanged.
				594	*/
drh	9c06c95	2005-11-26 00:25:00 +0000	[diff] [blame]	595	static int unixOpenReadWrite(
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	596	const char *zFilename,
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame]	597	OsFile **pId,
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	598	int *pReadonly
				599	){
				600	int rc;
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	601	unixFile f;
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame]	602
				603	assert( 0==*pId );
				604	f.dirfd = -1;
				605	SET_THREADID(&f);
				606	f.h = open(zFilename, O_RDWR\|O_CREAT\|O_LARGEFILE\|O_BINARY,
drh	8e85577	2005-05-17 11:25:31 +0000	[diff] [blame]	607	SQLITE_DEFAULT_FILE_PERMISSIONS);
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame]	608	if( f.h<0 ){
drh	6458e39	2004-07-20 01:14:13 +0000	[diff] [blame]	609	#ifdef EISDIR
				610	if( errno==EISDIR ){
				611	return SQLITE_CANTOPEN;
				612	}
				613	#endif
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame]	614	f.h = open(zFilename, O_RDONLY\|O_LARGEFILE\|O_BINARY);
				615	if( f.h<0 ){
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	616	return SQLITE_CANTOPEN;
				617	}
				618	*pReadonly = 1;
				619	}else{
				620	*pReadonly = 0;
				621	}
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	622	sqlite3Os.xEnterMutex();
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame]	623	rc = findLockInfo(f.h, &f.pLock, &f.pOpen);
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	624	sqlite3Os.xLeaveMutex();
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	625	if( rc ){
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame]	626	close(f.h);
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	627	return SQLITE_NOMEM;
				628	}
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame]	629	f.locktype = 0;
				630	TRACE3("OPEN %-3d %s\n", f.h, zFilename);
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	631	return allocateUnixFile(&f, pId);
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	632	}
				633
				634
				635	/*
				636	** Attempt to open a new file for exclusive access by this process.
				637	** The file will be opened for both reading and writing. To avoid
				638	** a potential security problem, we do not allow the file to have
				639	** previously existed. Nor do we allow the file to be a symbolic
				640	** link.
				641	**
				642	** If delFlag is true, then make arrangements to automatically delete
				643	** the file when it is closed.
				644	**
				645	** On success, write the file handle into *id and return SQLITE_OK.
				646	**
				647	** On failure, return SQLITE_CANTOPEN.
				648	*/
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame]	649	static int unixOpenExclusive(const char zFilename, OsFile *pId, int delFlag){
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	650	int rc;
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	651	unixFile f;
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame]	652
				653	assert( 0==*pId );
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	654	if( access(zFilename, 0)==0 ){
				655	return SQLITE_CANTOPEN;
				656	}
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame]	657	SET_THREADID(&f);
				658	f.dirfd = -1;
				659	f.h = open(zFilename,
drh	d645967	2005-08-13 17:17:01 +0000	[diff] [blame]	660	O_RDWR\|O_CREAT\|O_EXCL\|O_NOFOLLOW\|O_LARGEFILE\|O_BINARY,
				661	SQLITE_DEFAULT_FILE_PERMISSIONS);
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame]	662	if( f.h<0 ){
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	663	return SQLITE_CANTOPEN;
				664	}
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	665	sqlite3Os.xEnterMutex();
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame]	666	rc = findLockInfo(f.h, &f.pLock, &f.pOpen);
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	667	sqlite3Os.xLeaveMutex();
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	668	if( rc ){
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame]	669	close(f.h);
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	670	unlink(zFilename);
				671	return SQLITE_NOMEM;
				672	}
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame]	673	f.locktype = 0;
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	674	if( delFlag ){
				675	unlink(zFilename);
				676	}
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame]	677	TRACE3("OPEN-EX %-3d %s\n", f.h, zFilename);
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	678	return allocateUnixFile(&f, pId);
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	679	}
				680
				681	/*
				682	** Attempt to open a new file for read-only access.
				683	**
				684	** On success, write the file handle into *id and return SQLITE_OK.
				685	**
				686	** On failure, return SQLITE_CANTOPEN.
				687	*/
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame]	688	static int unixOpenReadOnly(const char zFilename, OsFile *pId){
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	689	int rc;
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	690	unixFile f;
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame]	691
				692	assert( 0==*pId );
				693	SET_THREADID(&f);
				694	f.dirfd = -1;
				695	f.h = open(zFilename, O_RDONLY\|O_LARGEFILE\|O_BINARY);
				696	if( f.h<0 ){
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	697	return SQLITE_CANTOPEN;
				698	}
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	699	sqlite3Os.xEnterMutex();
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame]	700	rc = findLockInfo(f.h, &f.pLock, &f.pOpen);
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	701	sqlite3Os.xLeaveMutex();
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	702	if( rc ){
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame]	703	close(f.h);
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	704	return SQLITE_NOMEM;
				705	}
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame]	706	f.locktype = 0;
				707	TRACE3("OPEN-RO %-3d %s\n", f.h, zFilename);
danielk1977	261919c	2005-12-06 12:52:59 +0000	[diff] [blame]	708
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	709	return allocateUnixFile(&f, pId);
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	710	}
				711
				712	/*
				713	** Attempt to open a file descriptor for the directory that contains a
				714	** file. This file descriptor can be used to fsync() the directory
				715	** in order to make sure the creation of a new file is actually written
				716	** to disk.
				717	**
				718	** This routine is only meaningful for Unix. It is a no-op under
				719	** windows since windows does not support hard links.
				720	**
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame]	721	** On success, a handle for a previously open file at *id is
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	722	** updated with the new directory file descriptor and SQLITE_OK is
				723	** returned.
				724	**
				725	** On failure, the function returns SQLITE_CANTOPEN and leaves
				726	** *id unchanged.
				727	*/
drh	9c06c95	2005-11-26 00:25:00 +0000	[diff] [blame]	728	static int unixOpenDirectory(
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	729	OsFile *id,
				730	const char *zDirname
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	731	){
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	732	unixFile pFile = (unixFile)id;
				733	if( pFile==0 ){
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	734	/* Do not open the directory if the corresponding file is not already
				735	** open. */
				736	return SQLITE_CANTOPEN;
				737	}
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	738	SET_THREADID(pFile);
				739	assert( pFile->dirfd<0 );
				740	pFile->dirfd = open(zDirname, O_RDONLY\|O_BINARY, 0);
				741	if( pFile->dirfd<0 ){
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	742	return SQLITE_CANTOPEN;
				743	}
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	744	TRACE3("OPENDIR %-3d %s\n", pFile->dirfd, zDirname);
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	745	return SQLITE_OK;
				746	}
				747
				748	/*
drh	ab3f9fe	2004-08-14 17:10:10 +0000	[diff] [blame]	749	** If the following global variable points to a string which is the
				750	** name of a directory, then that directory will be used to store
				751	** temporary files.
				752	*/
tpoindex	9a09a3c	2004-12-20 19:01:32 +0000	[diff] [blame]	753	char *sqlite3_temp_directory = 0;
drh	ab3f9fe	2004-08-14 17:10:10 +0000	[diff] [blame]	754
				755	/*
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	756	** Create a temporary file name in zBuf. zBuf must be big enough to
				757	** hold at least SQLITE_TEMPNAME_SIZE characters.
				758	*/
drh	9c06c95	2005-11-26 00:25:00 +0000	[diff] [blame]	759	static int unixTempFileName(char *zBuf){
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	760	static const char *azDirs[] = {
drh	ab3f9fe	2004-08-14 17:10:10 +0000	[diff] [blame]	761	0,
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	762	"/var/tmp",
				763	"/usr/tmp",
				764	"/tmp",
				765	".",
				766	};
drh	5719628	2004-10-06 15:41:16 +0000	[diff] [blame]	767	static const unsigned char zChars[] =
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	768	"abcdefghijklmnopqrstuvwxyz"
				769	"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
				770	"0123456789";
				771	int i, j;
				772	struct stat buf;
				773	const char *zDir = ".";
drh	effd02b	2004-08-29 23:42:13 +0000	[diff] [blame]	774	azDirs[0] = sqlite3_temp_directory;
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	775	for(i=0; i<sizeof(azDirs)/sizeof(azDirs[0]); i++){
drh	ab3f9fe	2004-08-14 17:10:10 +0000	[diff] [blame]	776	if( azDirs[i]==0 ) continue;
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	777	if( stat(azDirs[i], &buf) ) continue;
				778	if( !S_ISDIR(buf.st_mode) ) continue;
				779	if( access(azDirs[i], 07) ) continue;
				780	zDir = azDirs[i];
				781	break;
				782	}
				783	do{
				784	sprintf(zBuf, "%s/"TEMP_FILE_PREFIX, zDir);
				785	j = strlen(zBuf);
				786	sqlite3Randomness(15, &zBuf[j]);
				787	for(i=0; i<15; i++, j++){
				788	zBuf[j] = (char)zChars[ ((unsigned char)zBuf[j])%(sizeof(zChars)-1) ];
				789	}
				790	zBuf[j] = 0;
				791	}while( access(zBuf,0)==0 );
				792	return SQLITE_OK;
				793	}
				794
				795	/*
tpoindex	9a09a3c	2004-12-20 19:01:32 +0000	[diff] [blame]	796	** Check that a given pathname is a directory and is writable
				797	**
				798	*/
drh	9c06c95	2005-11-26 00:25:00 +0000	[diff] [blame]	799	static int unixIsDirWritable(char *zBuf){
				800	#ifndef SQLITE_OMIT_PAGER_PRAGMAS
tpoindex	9a09a3c	2004-12-20 19:01:32 +0000	[diff] [blame]	801	struct stat buf;
				802	if( zBuf==0 ) return 0;
drh	268283b	2005-01-08 15:44:25 +0000	[diff] [blame]	803	if( zBuf[0]==0 ) return 0;
tpoindex	9a09a3c	2004-12-20 19:01:32 +0000	[diff] [blame]	804	if( stat(zBuf, &buf) ) return 0;
				805	if( !S_ISDIR(buf.st_mode) ) return 0;
				806	if( access(zBuf, 07) ) return 0;
drh	9c06c95	2005-11-26 00:25:00 +0000	[diff] [blame]	807	#endif /* SQLITE_OMIT_PAGER_PRAGMAS */
tpoindex	9a09a3c	2004-12-20 19:01:32 +0000	[diff] [blame]	808	return 1;
				809	}
				810
				811	/*
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	812	** Read data from a file into a buffer. Return SQLITE_OK if all
				813	** bytes were read successfully and SQLITE_IOERR if anything goes
				814	** wrong.
				815	*/
drh	9c06c95	2005-11-26 00:25:00 +0000	[diff] [blame]	816	static int unixRead(OsFile id, void pBuf, int amt){
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	817	int got;
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame]	818	assert( id );
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	819	SimulateIOError(SQLITE_IOERR);
				820	TIMER_START;
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	821	got = read(((unixFile*)id)->h, pBuf, amt);
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	822	TIMER_END;
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	823	TRACE5("READ %-3d %5d %7d %d\n", ((unixFile*)id)->h, got,
				824	last_page, TIMER_ELAPSED);
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	825	SEEK(0);
				826	/* if( got<0 ) got = 0; */
				827	if( got==amt ){
				828	return SQLITE_OK;
				829	}else{
				830	return SQLITE_IOERR;
				831	}
				832	}
				833
				834	/*
				835	** Write data from a buffer into a file. Return SQLITE_OK on success
				836	** or some other error code on failure.
				837	*/
drh	9c06c95	2005-11-26 00:25:00 +0000	[diff] [blame]	838	static int unixWrite(OsFile id, const void pBuf, int amt){
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	839	int wrote = 0;
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame]	840	assert( id );
drh	4c7f941	2005-02-03 00:29:47 +0000	[diff] [blame]	841	assert( amt>0 );
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	842	SimulateIOError(SQLITE_IOERR);
drh	047d483	2004-10-01 14:38:02 +0000	[diff] [blame]	843	SimulateDiskfullError;
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	844	TIMER_START;
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	845	while( amt>0 && (wrote = write(((unixFile*)id)->h, pBuf, amt))>0 ){
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	846	amt -= wrote;
				847	pBuf = &((char*)pBuf)[wrote];
				848	}
				849	TIMER_END;
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	850	TRACE5("WRITE %-3d %5d %7d %d\n", ((unixFile*)id)->h, wrote,
				851	last_page, TIMER_ELAPSED);
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	852	SEEK(0);
				853	if( amt>0 ){
				854	return SQLITE_FULL;
				855	}
				856	return SQLITE_OK;
				857	}
				858
				859	/*
				860	** Move the read/write pointer in a file.
				861	*/
drh	9c06c95	2005-11-26 00:25:00 +0000	[diff] [blame]	862	static int unixSeek(OsFile *id, i64 offset){
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame]	863	assert( id );
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	864	SEEK(offset/1024 + 1);
drh	b4746b9	2005-09-09 01:32:06 +0000	[diff] [blame]	865	#ifdef SQLITE_TEST
				866	if( offset ) SimulateDiskfullError
				867	#endif
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	868	lseek(((unixFile*)id)->h, offset, SEEK_SET);
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	869	return SQLITE_OK;
				870	}
				871
drh	b851b2c	2005-03-10 14:11:12 +0000	[diff] [blame]	872	#ifdef SQLITE_TEST
				873	/*
				874	** Count the number of fullsyncs and normal syncs. This is used to test
				875	** that syncs and fullsyncs are occuring at the right times.
				876	*/
				877	int sqlite3_sync_count = 0;
				878	int sqlite3_fullsync_count = 0;
				879	#endif
				880
drh	f2f2391	2005-10-05 10:29:36 +0000	[diff] [blame]	881	/*
				882	** Use the fdatasync() API only if the HAVE_FDATASYNC macro is defined.
				883	** Otherwise use fsync() in its place.
				884	*/
				885	#ifndef HAVE_FDATASYNC
				886	# define fdatasync fsync
				887	#endif
				888
drh	b851b2c	2005-03-10 14:11:12 +0000	[diff] [blame]	889
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	890	/*
drh	dd809b0	2004-07-17 21:44:57 +0000	[diff] [blame]	891	** The fsync() system call does not work as advertised on many
				892	** unix systems. The following procedure is an attempt to make
				893	** it work better.
drh	1398ad3	2005-01-19 23:24:50 +0000	[diff] [blame]	894	**
				895	** The SQLITE_NO_SYNC macro disables all fsync()s. This is useful
				896	** for testing when we want to run through the test suite quickly.
				897	** You are strongly advised not to deploy with SQLITE_NO_SYNC
				898	** enabled, however, since with SQLITE_NO_SYNC enabled, an OS crash
				899	** or power failure will likely corrupt the database file.
drh	dd809b0	2004-07-17 21:44:57 +0000	[diff] [blame]	900	*/
drh	eb796a7	2005-09-08 12:38:41 +0000	[diff] [blame]	901	static int full_fsync(int fd, int fullSync, int dataOnly){
drh	dd809b0	2004-07-17 21:44:57 +0000	[diff] [blame]	902	int rc;
drh	b851b2c	2005-03-10 14:11:12 +0000	[diff] [blame]	903
				904	/* Record the number of times that we do a normal fsync() and
				905	** FULLSYNC. This is used during testing to verify that this procedure
				906	** gets called with the correct arguments.
				907	*/
				908	#ifdef SQLITE_TEST
				909	if( fullSync ) sqlite3_fullsync_count++;
				910	sqlite3_sync_count++;
				911	#endif
				912
				913	/* If we compiled with the SQLITE_NO_SYNC flag, then syncing is a
				914	** no-op
				915	*/
				916	#ifdef SQLITE_NO_SYNC
				917	rc = SQLITE_OK;
				918	#else
				919
drh	dd809b0	2004-07-17 21:44:57 +0000	[diff] [blame]	920	#ifdef F_FULLFSYNC
drh	b851b2c	2005-03-10 14:11:12 +0000	[diff] [blame]	921	if( fullSync ){
drh	f30cc94	2005-03-11 17:52:34 +0000	[diff] [blame]	922	rc = fcntl(fd, F_FULLFSYNC, 0);
drh	b851b2c	2005-03-10 14:11:12 +0000	[diff] [blame]	923	}else{
				924	rc = 1;
				925	}
				926	/* If the FULLSYNC failed, try to do a normal fsync() */
drh	dd809b0	2004-07-17 21:44:57 +0000	[diff] [blame]	927	if( rc ) rc = fsync(fd);
drh	b851b2c	2005-03-10 14:11:12 +0000	[diff] [blame]	928
drh	c035e6e	2005-09-22 15:45:04 +0000	[diff] [blame]	929	#else /* if !defined(F_FULLSYNC) */
drh	eb796a7	2005-09-08 12:38:41 +0000	[diff] [blame]	930	if( dataOnly ){
				931	rc = fdatasync(fd);
drh	f2f2391	2005-10-05 10:29:36 +0000	[diff] [blame]	932	}else{
drh	eb796a7	2005-09-08 12:38:41 +0000	[diff] [blame]	933	rc = fsync(fd);
				934	}
drh	f30cc94	2005-03-11 17:52:34 +0000	[diff] [blame]	935	#endif /* defined(F_FULLFSYNC) */
drh	b851b2c	2005-03-10 14:11:12 +0000	[diff] [blame]	936	#endif /* defined(SQLITE_NO_SYNC) */
				937
drh	dd809b0	2004-07-17 21:44:57 +0000	[diff] [blame]	938	return rc;
				939	}
				940
				941	/*
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	942	** Make sure all writes to a particular file are committed to disk.
				943	**
drh	eb796a7	2005-09-08 12:38:41 +0000	[diff] [blame]	944	** If dataOnly==0 then both the file itself and its metadata (file
				945	** size, access time, etc) are synced. If dataOnly!=0 then only the
				946	** file data is synced.
				947	**
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	948	** Under Unix, also make sure that the directory entry for the file
				949	** has been created by fsync-ing the directory that contains the file.
				950	** If we do not do this and we encounter a power failure, the directory
				951	** entry for the journal might not exist after we reboot. The next
				952	** SQLite to access the file will not know that the journal exists (because
				953	** the directory entry for the journal was never created) and the transaction
				954	** will not roll back - possibly leading to database corruption.
				955	*/
drh	9c06c95	2005-11-26 00:25:00 +0000	[diff] [blame]	956	static int unixSync(OsFile *id, int dataOnly){
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	957	unixFile pFile = (unixFile)id;
				958	assert( pFile );
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	959	SimulateIOError(SQLITE_IOERR);
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	960	TRACE2("SYNC %-3d\n", pFile->h);
				961	if( full_fsync(pFile->h, pFile->fullSync, dataOnly) ){
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	962	return SQLITE_IOERR;
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	963	}
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	964	if( pFile->dirfd>=0 ){
				965	TRACE2("DIRSYNC %-3d\n", pFile->dirfd);
danielk1977	d7c03f7	2005-11-25 10:38:22 +0000	[diff] [blame]	966	#ifndef SQLITE_DISABLE_DIRSYNC
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	967	if( full_fsync(pFile->dirfd, pFile->fullSync, 0) ){
danielk1977	0964b23	2005-11-25 08:47:57 +0000	[diff] [blame]	968	return SQLITE_IOERR;
				969	}
danielk1977	d7c03f7	2005-11-25 10:38:22 +0000	[diff] [blame]	970	#endif
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	971	close(pFile->dirfd); /* Only need to sync once, so close the directory */
				972	pFile->dirfd = -1; /* when we are done. */
drh	a285422	2004-06-17 19:04:17 +0000	[diff] [blame]	973	}
drh	a285422	2004-06-17 19:04:17 +0000	[diff] [blame]	974	return SQLITE_OK;
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	975	}
				976
				977	/*
danielk1977	962398d	2004-06-14 09:35:16 +0000	[diff] [blame]	978	** Sync the directory zDirname. This is a no-op on operating systems other
				979	** than UNIX.
drh	b851b2c	2005-03-10 14:11:12 +0000	[diff] [blame]	980	**
				981	** This is used to make sure the master journal file has truely been deleted
				982	** before making changes to individual journals on a multi-database commit.
drh	f30cc94	2005-03-11 17:52:34 +0000	[diff] [blame]	983	** The F_FULLFSYNC option is not needed here.
danielk1977	962398d	2004-06-14 09:35:16 +0000	[diff] [blame]	984	*/
drh	9c06c95	2005-11-26 00:25:00 +0000	[diff] [blame]	985	static int unixSyncDirectory(const char *zDirname){
danielk1977	d7c03f7	2005-11-25 10:38:22 +0000	[diff] [blame]	986	#ifdef SQLITE_DISABLE_DIRSYNC
				987	return SQLITE_OK;
				988	#else
danielk1977	962398d	2004-06-14 09:35:16 +0000	[diff] [blame]	989	int fd;
				990	int r;
danielk1977	369f27e	2004-06-15 11:40:04 +0000	[diff] [blame]	991	SimulateIOError(SQLITE_IOERR);
drh	8e85577	2005-05-17 11:25:31 +0000	[diff] [blame]	992	fd = open(zDirname, O_RDONLY\|O_BINARY, 0);
danielk1977	369f27e	2004-06-15 11:40:04 +0000	[diff] [blame]	993	TRACE3("DIRSYNC %-3d (%s)\n", fd, zDirname);
danielk1977	962398d	2004-06-14 09:35:16 +0000	[diff] [blame]	994	if( fd<0 ){
				995	return SQLITE_CANTOPEN;
				996	}
				997	r = fsync(fd);
				998	close(fd);
				999	return ((r==0)?SQLITE_OK:SQLITE_IOERR);
danielk1977	d7c03f7	2005-11-25 10:38:22 +0000	[diff] [blame]	1000	#endif
danielk1977	962398d	2004-06-14 09:35:16 +0000	[diff] [blame]	1001	}
				1002
				1003	/*
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1004	** Truncate an open file to a specified size
				1005	*/
drh	9c06c95	2005-11-26 00:25:00 +0000	[diff] [blame]	1006	static int unixTruncate(OsFile *id, i64 nByte){
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame]	1007	assert( id );
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1008	SimulateIOError(SQLITE_IOERR);
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	1009	return ftruncate(((unixFile*)id)->h, nByte)==0 ? SQLITE_OK : SQLITE_IOERR;
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1010	}
				1011
				1012	/*
				1013	** Determine the current size of a file in bytes
				1014	*/
drh	9c06c95	2005-11-26 00:25:00 +0000	[diff] [blame]	1015	static int unixFileSize(OsFile id, i64 pSize){
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1016	struct stat buf;
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame]	1017	assert( id );
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1018	SimulateIOError(SQLITE_IOERR);
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	1019	if( fstat(((unixFile*)id)->h, &buf)!=0 ){
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1020	return SQLITE_IOERR;
				1021	}
				1022	*pSize = buf.st_size;
				1023	return SQLITE_OK;
				1024	}
				1025
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1026	/*
danielk1977	13adf8a	2004-06-03 16:08:41 +0000	[diff] [blame]	1027	** This routine checks if there is a RESERVED lock held on the specified
				1028	** file by this or any other process. If such a lock is held, return
drh	2ac3ee9	2004-06-07 16:27:46 +0000	[diff] [blame]	1029	** non-zero. If the file is unlocked or holds only SHARED locks, then
				1030	** return zero.
danielk1977	13adf8a	2004-06-03 16:08:41 +0000	[diff] [blame]	1031	*/
drh	9c06c95	2005-11-26 00:25:00 +0000	[diff] [blame]	1032	static int unixCheckReservedLock(OsFile *id){
danielk1977	13adf8a	2004-06-03 16:08:41 +0000	[diff] [blame]	1033	int r = 0;
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	1034	unixFile pFile = (unixFile)id;
danielk1977	13adf8a	2004-06-03 16:08:41 +0000	[diff] [blame]	1035
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	1036	assert( pFile );
				1037	if( CHECK_THREADID(pFile) ) return SQLITE_MISUSE;
				1038	sqlite3Os.xEnterMutex(); /* Because pFile->pLock is shared across threads */
danielk1977	13adf8a	2004-06-03 16:08:41 +0000	[diff] [blame]	1039
				1040	/* Check if a thread in this process holds such a lock */
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	1041	if( pFile->pLock->locktype>SHARED_LOCK ){
danielk1977	13adf8a	2004-06-03 16:08:41 +0000	[diff] [blame]	1042	r = 1;
				1043	}
				1044
drh	2ac3ee9	2004-06-07 16:27:46 +0000	[diff] [blame]	1045	/* Otherwise see if some other process holds it.
danielk1977	13adf8a	2004-06-03 16:08:41 +0000	[diff] [blame]	1046	*/
				1047	if( !r ){
				1048	struct flock lock;
				1049	lock.l_whence = SEEK_SET;
drh	2ac3ee9	2004-06-07 16:27:46 +0000	[diff] [blame]	1050	lock.l_start = RESERVED_BYTE;
				1051	lock.l_len = 1;
				1052	lock.l_type = F_WRLCK;
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	1053	fcntl(pFile->h, F_GETLK, &lock);
danielk1977	13adf8a	2004-06-03 16:08:41 +0000	[diff] [blame]	1054	if( lock.l_type!=F_UNLCK ){
				1055	r = 1;
				1056	}
				1057	}
				1058
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	1059	sqlite3Os.xLeaveMutex();
				1060	TRACE3("TEST WR-LOCK %d %d\n", pFile->h, r);
danielk1977	13adf8a	2004-06-03 16:08:41 +0000	[diff] [blame]	1061
				1062	return r;
				1063	}
				1064
danielk1977	2b44485	2004-06-29 07:45:33 +0000	[diff] [blame]	1065	#ifdef SQLITE_DEBUG
				1066	/*
				1067	** Helper function for printing out trace information from debugging
				1068	** binaries. This returns the string represetation of the supplied
				1069	** integer lock-type.
				1070	*/
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	1071	static const char *locktypeName(int locktype){
danielk1977	2b44485	2004-06-29 07:45:33 +0000	[diff] [blame]	1072	switch( locktype ){
				1073	case NO_LOCK: return "NONE";
				1074	case SHARED_LOCK: return "SHARED";
				1075	case RESERVED_LOCK: return "RESERVED";
				1076	case PENDING_LOCK: return "PENDING";
				1077	case EXCLUSIVE_LOCK: return "EXCLUSIVE";
				1078	}
				1079	return "ERROR";
				1080	}
				1081	#endif
				1082
danielk1977	13adf8a	2004-06-03 16:08:41 +0000	[diff] [blame]	1083	/*
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1084	** Lock the file with the lock specified by parameter locktype - one
				1085	** of the following:
				1086	**
drh	2ac3ee9	2004-06-07 16:27:46 +0000	[diff] [blame]	1087	** (1) SHARED_LOCK
				1088	** (2) RESERVED_LOCK
				1089	** (3) PENDING_LOCK
				1090	** (4) EXCLUSIVE_LOCK
				1091	**
drh	b3e0434	2004-06-08 00:47:47 +0000	[diff] [blame]	1092	** Sometimes when requesting one lock state, additional lock states
				1093	** are inserted in between. The locking might fail on one of the later
				1094	** transitions leaving the lock state different from what it started but
				1095	** still short of its goal. The following chart shows the allowed
				1096	** transitions and the inserted intermediate states:
				1097	**
				1098	** UNLOCKED -> SHARED
				1099	** SHARED -> RESERVED
				1100	** SHARED -> (PENDING) -> EXCLUSIVE
				1101	** RESERVED -> (PENDING) -> EXCLUSIVE
				1102	** PENDING -> EXCLUSIVE
drh	2ac3ee9	2004-06-07 16:27:46 +0000	[diff] [blame]	1103	**
drh	a6abd04	2004-06-09 17:37:22 +0000	[diff] [blame]	1104	** This routine will only increase a lock. Use the sqlite3OsUnlock()
				1105	** routine to lower a locking level.
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1106	*/
drh	9c06c95	2005-11-26 00:25:00 +0000	[diff] [blame]	1107	static int unixLock(OsFile *id, int locktype){
danielk1977	f42f25c	2004-06-25 07:21:28 +0000	[diff] [blame]	1108	/* The following describes the implementation of the various locks and
				1109	** lock transitions in terms of the POSIX advisory shared and exclusive
				1110	** lock primitives (called read-locks and write-locks below, to avoid
				1111	** confusion with SQLite lock names). The algorithms are complicated
				1112	** slightly in order to be compatible with windows systems simultaneously
				1113	** accessing the same database file, in case that is ever required.
				1114	**
				1115	** Symbols defined in os.h indentify the 'pending byte' and the 'reserved
				1116	** byte', each single bytes at well known offsets, and the 'shared byte
				1117	** range', a range of 510 bytes at a well known offset.
				1118	**
				1119	** To obtain a SHARED lock, a read-lock is obtained on the 'pending
				1120	** byte'. If this is successful, a random byte from the 'shared byte
				1121	** range' is read-locked and the lock on the 'pending byte' released.
				1122	**
danielk1977	90ba3bd	2004-06-25 08:32:25 +0000	[diff] [blame]	1123	** A process may only obtain a RESERVED lock after it has a SHARED lock.
				1124	** A RESERVED lock is implemented by grabbing a write-lock on the
				1125	** 'reserved byte'.
danielk1977	f42f25c	2004-06-25 07:21:28 +0000	[diff] [blame]	1126	**
				1127	** A process may only obtain a PENDING lock after it has obtained a
danielk1977	90ba3bd	2004-06-25 08:32:25 +0000	[diff] [blame]	1128	** SHARED lock. A PENDING lock is implemented by obtaining a write-lock
				1129	** on the 'pending byte'. This ensures that no new SHARED locks can be
				1130	** obtained, but existing SHARED locks are allowed to persist. A process
				1131	** does not have to obtain a RESERVED lock on the way to a PENDING lock.
				1132	** This property is used by the algorithm for rolling back a journal file
				1133	** after a crash.
danielk1977	f42f25c	2004-06-25 07:21:28 +0000	[diff] [blame]	1134	**
danielk1977	90ba3bd	2004-06-25 08:32:25 +0000	[diff] [blame]	1135	** An EXCLUSIVE lock, obtained after a PENDING lock is held, is
				1136	** implemented by obtaining a write-lock on the entire 'shared byte
				1137	** range'. Since all other locks require a read-lock on one of the bytes
				1138	** within this range, this ensures that no other locks are held on the
				1139	** database.
danielk1977	f42f25c	2004-06-25 07:21:28 +0000	[diff] [blame]	1140	**
				1141	** The reason a single byte cannot be used instead of the 'shared byte
				1142	** range' is that some versions of windows do not support read-locks. By
				1143	** locking a random byte from a range, concurrent SHARED locks may exist
				1144	** even if the locking primitive used is always a write-lock.
				1145	*/
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1146	int rc = SQLITE_OK;
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	1147	unixFile pFile = (unixFile)id;
				1148	struct lockInfo *pLock = pFile->pLock;
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1149	struct flock lock;
				1150	int s;
				1151
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	1152	assert( pFile );
				1153	TRACE7("LOCK %d %s was %s(%s,%d) pid=%d\n", pFile->h,
				1154	locktypeName(locktype), locktypeName(pFile->locktype),
				1155	locktypeName(pLock->locktype), pLock->cnt , getpid());
				1156	if( CHECK_THREADID(pFile) ) return SQLITE_MISUSE;
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1157
				1158	/* If there is already a lock of this type or more restrictive on the
				1159	** OsFile, do nothing. Don't use the end_lock: exit path, as
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	1160	** sqlite3Os.xEnterMutex() hasn't been called yet.
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1161	*/
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	1162	if( pFile->locktype>=locktype ){
				1163	TRACE3("LOCK %d %s ok (already held)\n", pFile->h,
				1164	locktypeName(locktype));
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1165	return SQLITE_OK;
				1166	}
				1167
drh	b3e0434	2004-06-08 00:47:47 +0000	[diff] [blame]	1168	/* Make sure the locking sequence is correct
drh	2ac3ee9	2004-06-07 16:27:46 +0000	[diff] [blame]	1169	*/
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	1170	assert( pFile->locktype!=NO_LOCK \|\| locktype==SHARED_LOCK );
drh	b3e0434	2004-06-08 00:47:47 +0000	[diff] [blame]	1171	assert( locktype!=PENDING_LOCK );
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	1172	assert( locktype!=RESERVED_LOCK \|\| pFile->locktype==SHARED_LOCK );
drh	2ac3ee9	2004-06-07 16:27:46 +0000	[diff] [blame]	1173
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	1174	/* This mutex is needed because pFile->pLock is shared across threads
drh	b3e0434	2004-06-08 00:47:47 +0000	[diff] [blame]	1175	*/
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	1176	sqlite3Os.xEnterMutex();
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1177
				1178	/* If some thread using this PID has a lock via a different OsFile*
				1179	** handle that precludes the requested lock, return BUSY.
				1180	*/
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	1181	if( (pFile->locktype!=pLock->locktype &&
drh	2ac3ee9	2004-06-07 16:27:46 +0000	[diff] [blame]	1182	(pLock->locktype>=PENDING_LOCK \|\| locktype>SHARED_LOCK))
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1183	){
				1184	rc = SQLITE_BUSY;
				1185	goto end_lock;
				1186	}
				1187
				1188	/* If a SHARED lock is requested, and some thread using this PID already
				1189	** has a SHARED or RESERVED lock, then increment reference counts and
				1190	** return SQLITE_OK.
				1191	*/
				1192	if( locktype==SHARED_LOCK &&
				1193	(pLock->locktype==SHARED_LOCK \|\| pLock->locktype==RESERVED_LOCK) ){
				1194	assert( locktype==SHARED_LOCK );
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	1195	assert( pFile->locktype==0 );
danielk1977	ecb2a96	2004-06-02 06:30:16 +0000	[diff] [blame]	1196	assert( pLock->cnt>0 );
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	1197	pFile->locktype = SHARED_LOCK;
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1198	pLock->cnt++;
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	1199	pFile->pOpen->nLock++;
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1200	goto end_lock;
				1201	}
				1202
danielk1977	13adf8a	2004-06-03 16:08:41 +0000	[diff] [blame]	1203	lock.l_len = 1L;
drh	2b4b596	2005-06-15 17:47:55 +0000	[diff] [blame]	1204
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1205	lock.l_whence = SEEK_SET;
				1206
drh	3cde3bb	2004-06-12 02:17:14 +0000	[diff] [blame]	1207	/* A PENDING lock is needed before acquiring a SHARED lock and before
				1208	** acquiring an EXCLUSIVE lock. For the SHARED lock, the PENDING will
				1209	** be released.
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1210	*/
drh	3cde3bb	2004-06-12 02:17:14 +0000	[diff] [blame]	1211	if( locktype==SHARED_LOCK
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	1212	\|\| (locktype==EXCLUSIVE_LOCK && pFile->locktype<PENDING_LOCK)
drh	3cde3bb	2004-06-12 02:17:14 +0000	[diff] [blame]	1213	){
danielk1977	489468c	2004-06-28 08:25:47 +0000	[diff] [blame]	1214	lock.l_type = (locktype==SHARED_LOCK?F_RDLCK:F_WRLCK);
drh	2ac3ee9	2004-06-07 16:27:46 +0000	[diff] [blame]	1215	lock.l_start = PENDING_BYTE;
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	1216	s = fcntl(pFile->h, F_SETLK, &lock);
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1217	if( s ){
				1218	rc = (errno==EINVAL) ? SQLITE_NOLFS : SQLITE_BUSY;
				1219	goto end_lock;
				1220	}
drh	3cde3bb	2004-06-12 02:17:14 +0000	[diff] [blame]	1221	}
				1222
				1223
				1224	/* If control gets to this point, then actually go ahead and make
				1225	** operating system calls for the specified lock.
				1226	*/
				1227	if( locktype==SHARED_LOCK ){
				1228	assert( pLock->cnt==0 );
				1229	assert( pLock->locktype==0 );
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1230
drh	2ac3ee9	2004-06-07 16:27:46 +0000	[diff] [blame]	1231	/* Now get the read-lock */
				1232	lock.l_start = SHARED_FIRST;
				1233	lock.l_len = SHARED_SIZE;
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	1234	s = fcntl(pFile->h, F_SETLK, &lock);
drh	2ac3ee9	2004-06-07 16:27:46 +0000	[diff] [blame]	1235
				1236	/* Drop the temporary PENDING lock */
				1237	lock.l_start = PENDING_BYTE;
				1238	lock.l_len = 1L;
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1239	lock.l_type = F_UNLCK;
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	1240	if( fcntl(pFile->h, F_SETLK, &lock)!=0 ){
drh	2b4b596	2005-06-15 17:47:55 +0000	[diff] [blame]	1241	rc = SQLITE_IOERR; /* This should never happen */
				1242	goto end_lock;
				1243	}
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1244	if( s ){
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1245	rc = (errno==EINVAL) ? SQLITE_NOLFS : SQLITE_BUSY;
				1246	}else{
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	1247	pFile->locktype = SHARED_LOCK;
				1248	pFile->pOpen->nLock++;
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1249	pLock->cnt = 1;
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1250	}
drh	3cde3bb	2004-06-12 02:17:14 +0000	[diff] [blame]	1251	}else if( locktype==EXCLUSIVE_LOCK && pLock->cnt>1 ){
				1252	/* We are trying for an exclusive lock but another thread in this
				1253	** same process is still holding a shared lock. */
				1254	rc = SQLITE_BUSY;
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1255	}else{
drh	3cde3bb	2004-06-12 02:17:14 +0000	[diff] [blame]	1256	/* The request was for a RESERVED or EXCLUSIVE lock. It is
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1257	** assumed that there is a SHARED or greater lock on the file
				1258	** already.
				1259	*/
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	1260	assert( 0!=pFile->locktype );
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1261	lock.l_type = F_WRLCK;
				1262	switch( locktype ){
				1263	case RESERVED_LOCK:
drh	2ac3ee9	2004-06-07 16:27:46 +0000	[diff] [blame]	1264	lock.l_start = RESERVED_BYTE;
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1265	break;
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1266	case EXCLUSIVE_LOCK:
drh	2ac3ee9	2004-06-07 16:27:46 +0000	[diff] [blame]	1267	lock.l_start = SHARED_FIRST;
				1268	lock.l_len = SHARED_SIZE;
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1269	break;
				1270	default:
				1271	assert(0);
				1272	}
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	1273	s = fcntl(pFile->h, F_SETLK, &lock);
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1274	if( s ){
				1275	rc = (errno==EINVAL) ? SQLITE_NOLFS : SQLITE_BUSY;
				1276	}
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1277	}
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1278
danielk1977	ecb2a96	2004-06-02 06:30:16 +0000	[diff] [blame]	1279	if( rc==SQLITE_OK ){
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	1280	pFile->locktype = locktype;
danielk1977	ecb2a96	2004-06-02 06:30:16 +0000	[diff] [blame]	1281	pLock->locktype = locktype;
drh	3cde3bb	2004-06-12 02:17:14 +0000	[diff] [blame]	1282	}else if( locktype==EXCLUSIVE_LOCK ){
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	1283	pFile->locktype = PENDING_LOCK;
drh	3cde3bb	2004-06-12 02:17:14 +0000	[diff] [blame]	1284	pLock->locktype = PENDING_LOCK;
danielk1977	ecb2a96	2004-06-02 06:30:16 +0000	[diff] [blame]	1285	}
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1286
				1287	end_lock:
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	1288	sqlite3Os.xLeaveMutex();
				1289	TRACE4("LOCK %d %s %s\n", pFile->h, locktypeName(locktype),
danielk1977	2b44485	2004-06-29 07:45:33 +0000	[diff] [blame]	1290	rc==SQLITE_OK ? "ok" : "failed");
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1291	return rc;
				1292	}
				1293
				1294	/*
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	1295	** Lower the locking level on file descriptor pFile to locktype. locktype
drh	a6abd04	2004-06-09 17:37:22 +0000	[diff] [blame]	1296	** must be either NO_LOCK or SHARED_LOCK.
				1297	**
				1298	** If the locking level of the file descriptor is already at or below
				1299	** the requested locking level, this routine is a no-op.
				1300	**
drh	9c105bb	2004-10-02 20:38:28 +0000	[diff] [blame]	1301	** It is not possible for this routine to fail if the second argument
				1302	** is NO_LOCK. If the second argument is SHARED_LOCK, this routine
				1303	** might return SQLITE_IOERR instead of SQLITE_OK.
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1304	*/
drh	9c06c95	2005-11-26 00:25:00 +0000	[diff] [blame]	1305	static int unixUnlock(OsFile *id, int locktype){
drh	a6abd04	2004-06-09 17:37:22 +0000	[diff] [blame]	1306	struct lockInfo *pLock;
				1307	struct flock lock;
drh	9c105bb	2004-10-02 20:38:28 +0000	[diff] [blame]	1308	int rc = SQLITE_OK;
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	1309	unixFile pFile = (unixFile)id;
drh	a6abd04	2004-06-09 17:37:22 +0000	[diff] [blame]	1310
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	1311	assert( pFile );
				1312	TRACE7("UNLOCK %d %d was %d(%d,%d) pid=%d\n", pFile->h, locktype,
				1313	pFile->locktype, pFile->pLock->locktype, pFile->pLock->cnt, getpid());
				1314	if( CHECK_THREADID(pFile) ) return SQLITE_MISUSE;
drh	a6abd04	2004-06-09 17:37:22 +0000	[diff] [blame]	1315
				1316	assert( locktype<=SHARED_LOCK );
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	1317	if( pFile->locktype<=locktype ){
drh	a6abd04	2004-06-09 17:37:22 +0000	[diff] [blame]	1318	return SQLITE_OK;
				1319	}
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	1320	sqlite3Os.xEnterMutex();
				1321	pLock = pFile->pLock;
drh	a6abd04	2004-06-09 17:37:22 +0000	[diff] [blame]	1322	assert( pLock->cnt!=0 );
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	1323	if( pFile->locktype>SHARED_LOCK ){
				1324	assert( pLock->locktype==pFile->locktype );
drh	9c105bb	2004-10-02 20:38:28 +0000	[diff] [blame]	1325	if( locktype==SHARED_LOCK ){
				1326	lock.l_type = F_RDLCK;
				1327	lock.l_whence = SEEK_SET;
				1328	lock.l_start = SHARED_FIRST;
				1329	lock.l_len = SHARED_SIZE;
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	1330	if( fcntl(pFile->h, F_SETLK, &lock)!=0 ){
drh	9c105bb	2004-10-02 20:38:28 +0000	[diff] [blame]	1331	/* This should never happen */
				1332	rc = SQLITE_IOERR;
				1333	}
				1334	}
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1335	lock.l_type = F_UNLCK;
				1336	lock.l_whence = SEEK_SET;
drh	a6abd04	2004-06-09 17:37:22 +0000	[diff] [blame]	1337	lock.l_start = PENDING_BYTE;
				1338	lock.l_len = 2L; assert( PENDING_BYTE+1==RESERVED_BYTE );
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	1339	if( fcntl(pFile->h, F_SETLK, &lock)==0 ){
drh	2b4b596	2005-06-15 17:47:55 +0000	[diff] [blame]	1340	pLock->locktype = SHARED_LOCK;
				1341	}else{
				1342	rc = SQLITE_IOERR; /* This should never happen */
				1343	}
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1344	}
drh	a6abd04	2004-06-09 17:37:22 +0000	[diff] [blame]	1345	if( locktype==NO_LOCK ){
				1346	struct openCnt *pOpen;
danielk1977	ecb2a96	2004-06-02 06:30:16 +0000	[diff] [blame]	1347
drh	a6abd04	2004-06-09 17:37:22 +0000	[diff] [blame]	1348	/* Decrement the shared lock counter. Release the lock using an
				1349	** OS call only when all threads in this same process have released
				1350	** the lock.
				1351	*/
				1352	pLock->cnt--;
				1353	if( pLock->cnt==0 ){
				1354	lock.l_type = F_UNLCK;
				1355	lock.l_whence = SEEK_SET;
				1356	lock.l_start = lock.l_len = 0L;
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	1357	if( fcntl(pFile->h, F_SETLK, &lock)==0 ){
drh	2b4b596	2005-06-15 17:47:55 +0000	[diff] [blame]	1358	pLock->locktype = NO_LOCK;
				1359	}else{
				1360	rc = SQLITE_IOERR; /* This should never happen */
				1361	}
drh	a6abd04	2004-06-09 17:37:22 +0000	[diff] [blame]	1362	}
				1363
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1364	/* Decrement the count of locks against this same file. When the
				1365	** count reaches zero, close any other file descriptors whose close
				1366	** was deferred because of outstanding locks.
				1367	*/
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	1368	pOpen = pFile->pOpen;
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1369	pOpen->nLock--;
				1370	assert( pOpen->nLock>=0 );
				1371	if( pOpen->nLock==0 && pOpen->nPending>0 ){
				1372	int i;
				1373	for(i=0; i<pOpen->nPending; i++){
				1374	close(pOpen->aPending[i]);
				1375	}
				1376	sqliteFree(pOpen->aPending);
				1377	pOpen->nPending = 0;
				1378	pOpen->aPending = 0;
				1379	}
				1380	}
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	1381	sqlite3Os.xLeaveMutex();
				1382	pFile->locktype = locktype;
drh	9c105bb	2004-10-02 20:38:28 +0000	[diff] [blame]	1383	return rc;
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1384	}
				1385
				1386	/*
danielk1977	e302663	2004-06-22 11:29:02 +0000	[diff] [blame]	1387	** Close a file.
				1388	*/
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame]	1389	static int unixClose(OsFile **pId){
danielk1977	441b09a	2006-01-05 13:48:29 +0000	[diff] [blame^]	1390	SqliteTsd *pTsd = sqlite3Tsd();
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	1391	unixFile id = (unixFile)*pId;
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame]	1392	if( !id ) return SQLITE_OK;
drh	2b4b596	2005-06-15 17:47:55 +0000	[diff] [blame]	1393	if( CHECK_THREADID(id) ) return SQLITE_MISUSE;
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	1394	unixUnlock(*pId, NO_LOCK);
danielk1977	e302663	2004-06-22 11:29:02 +0000	[diff] [blame]	1395	if( id->dirfd>=0 ) close(id->dirfd);
				1396	id->dirfd = -1;
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	1397	sqlite3Os.xEnterMutex();
danielk1977	441b09a	2006-01-05 13:48:29 +0000	[diff] [blame^]	1398
				1399	/* Disable the sqlite3_release_memory() function */
				1400	assert( !pTsd->disableReleaseMemory );
				1401	pTsd->disableReleaseMemory = 1;
				1402
danielk1977	e302663	2004-06-22 11:29:02 +0000	[diff] [blame]	1403	if( id->pOpen->nLock ){
				1404	/* If there are outstanding locks, do not actually close the file just
				1405	** yet because that would clear those locks. Instead, add the file
				1406	** descriptor to pOpen->aPending. It will be automatically closed when
				1407	** the last lock is cleared.
				1408	*/
				1409	int *aNew;
				1410	struct openCnt *pOpen = id->pOpen;
drh	ad81e87	2005-08-21 21:45:01 +0000	[diff] [blame]	1411	aNew = sqliteRealloc( pOpen->aPending, (pOpen->nPending+1)*sizeof(int) );
danielk1977	e302663	2004-06-22 11:29:02 +0000	[diff] [blame]	1412	if( aNew==0 ){
				1413	/* If a malloc fails, just leak the file descriptor */
				1414	}else{
				1415	pOpen->aPending = aNew;
drh	ad81e87	2005-08-21 21:45:01 +0000	[diff] [blame]	1416	pOpen->aPending[pOpen->nPending] = id->h;
				1417	pOpen->nPending++;
danielk1977	e302663	2004-06-22 11:29:02 +0000	[diff] [blame]	1418	}
				1419	}else{
				1420	/* There are no outstanding locks so we can close the file immediately */
				1421	close(id->h);
				1422	}
				1423	releaseLockInfo(id->pLock);
				1424	releaseOpenCnt(id->pOpen);
danielk1977	441b09a	2006-01-05 13:48:29 +0000	[diff] [blame^]	1425
				1426	/* Disable the sqlite3_release_memory() function */
				1427	pTsd->disableReleaseMemory = 0;
				1428
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	1429	sqlite3Os.xLeaveMutex();
danielk1977	e302663	2004-06-22 11:29:02 +0000	[diff] [blame]	1430	id->isOpen = 0;
				1431	TRACE2("CLOSE %-3d\n", id->h);
				1432	OpenCounter(-1);
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame]	1433	sqliteFree(id);
				1434	*pId = 0;
danielk1977	e302663	2004-06-22 11:29:02 +0000	[diff] [blame]	1435	return SQLITE_OK;
				1436	}
				1437
				1438	/*
drh	0ccebe7	2005-06-07 22:22:50 +0000	[diff] [blame]	1439	** Turn a relative pathname into a full pathname. Return a pointer
				1440	** to the full pathname stored in space obtained from sqliteMalloc().
				1441	** The calling function is responsible for freeing this space once it
				1442	** is no longer needed.
				1443	*/
drh	9c06c95	2005-11-26 00:25:00 +0000	[diff] [blame]	1444	static char unixFullPathname(const char zRelative){
drh	0ccebe7	2005-06-07 22:22:50 +0000	[diff] [blame]	1445	char *zFull = 0;
				1446	if( zRelative[0]=='/' ){
				1447	sqlite3SetString(&zFull, zRelative, (char*)0);
				1448	}else{
drh	79158e1	2005-09-06 21:40:45 +0000	[diff] [blame]	1449	char *zBuf = sqliteMalloc(5000);
				1450	if( zBuf==0 ){
				1451	return 0;
				1452	}
drh	0ccebe7	2005-06-07 22:22:50 +0000	[diff] [blame]	1453	zBuf[0] = 0;
drh	79158e1	2005-09-06 21:40:45 +0000	[diff] [blame]	1454	sqlite3SetString(&zFull, getcwd(zBuf, 5000), "/", zRelative,
drh	0ccebe7	2005-06-07 22:22:50 +0000	[diff] [blame]	1455	(char*)0);
drh	79158e1	2005-09-06 21:40:45 +0000	[diff] [blame]	1456	sqliteFree(zBuf);
drh	0ccebe7	2005-06-07 22:22:50 +0000	[diff] [blame]	1457	}
				1458	return zFull;
				1459	}
				1460
drh	1883921	2005-11-26 03:43:23 +0000	[diff] [blame]	1461	/*
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame]	1462	** Change the value of the fullsync flag in the given file descriptor.
drh	1883921	2005-11-26 03:43:23 +0000	[diff] [blame]	1463	*/
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame]	1464	static void unixSetFullSync(OsFile *id, int v){
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	1465	((unixFile*)id)->fullSync = v;
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame]	1466	}
				1467
				1468	/*
				1469	** Return the underlying file handle for an OsFile
				1470	*/
				1471	static int unixFileHandle(OsFile *id){
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	1472	return ((unixFile*)id)->h;
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame]	1473	}
				1474
				1475	/*
				1476	** Return an integer that indices the type of lock currently held
				1477	** by this handle. (Used for testing and analysis only.)
				1478	*/
				1479	static int unixLockState(OsFile *id){
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	1480	return ((unixFile*)id)->locktype;
drh	1883921	2005-11-26 03:43:23 +0000	[diff] [blame]	1481	}
drh	0ccebe7	2005-06-07 22:22:50 +0000	[diff] [blame]	1482
drh	9c06c95	2005-11-26 00:25:00 +0000	[diff] [blame]	1483	/*
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	1484	** This vector defines all the methods that can operate on an OsFile
				1485	** for unix.
drh	9c06c95	2005-11-26 00:25:00 +0000	[diff] [blame]	1486	*/
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	1487	static const IoMethod sqlite3UnixIoMethod = {
drh	9c06c95	2005-11-26 00:25:00 +0000	[diff] [blame]	1488	unixClose,
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	1489	unixOpenDirectory,
drh	9c06c95	2005-11-26 00:25:00 +0000	[diff] [blame]	1490	unixRead,
				1491	unixWrite,
				1492	unixSeek,
drh	9c06c95	2005-11-26 00:25:00 +0000	[diff] [blame]	1493	unixTruncate,
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	1494	unixSync,
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame]	1495	unixSetFullSync,
				1496	unixFileHandle,
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	1497	unixFileSize,
				1498	unixLock,
				1499	unixUnlock,
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame]	1500	unixLockState,
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	1501	unixCheckReservedLock,
drh	9c06c95	2005-11-26 00:25:00 +0000	[diff] [blame]	1502	};
				1503
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	1504	/*
				1505	** Allocate memory for a unixFile. Initialize the new unixFile
				1506	** to the value given in pInit and return a pointer to the new
				1507	** OsFile. If we run out of memory, close the file and return NULL.
				1508	*/
				1509	static int allocateUnixFile(unixFile pInit, OsFile *pId){
				1510	unixFile *pNew;
				1511	pNew = sqliteMalloc( sizeof(unixFile) );
				1512	if( pNew==0 ){
				1513	close(pInit->h);
danielk1977	2e588c7	2005-12-09 14:25:08 +0000	[diff] [blame]	1514	releaseLockInfo(pInit->pLock);
				1515	releaseOpenCnt(pInit->pOpen);
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	1516	*pId = 0;
				1517	return SQLITE_NOMEM;
				1518	}else{
				1519	pNew = pInit;
				1520	pNew->pMethod = &sqlite3UnixIoMethod;
				1521	pId = (OsFile)pNew;
				1522	OpenCounter(+1);
				1523	return SQLITE_OK;
				1524	}
				1525	}
				1526
drh	9c06c95	2005-11-26 00:25:00 +0000	[diff] [blame]	1527
drh	0ccebe7	2005-06-07 22:22:50 +0000	[diff] [blame]	1528	#endif /* SQLITE_OMIT_DISKIO */
				1529	/***************************************************************************
				1530	** Everything above deals with file I/O. Everything that follows deals
				1531	** with other miscellanous aspects of the operating system interface
				1532	****************************************************************************/
				1533
				1534
				1535	/*
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1536	** Get information to seed the random number generator. The seed
				1537	** is written into the buffer zBuf[256]. The calling function must
				1538	** supply a sufficiently large buffer.
				1539	*/
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	1540	static int unixRandomSeed(char *zBuf){
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1541	/* We have to initialize zBuf to prevent valgrind from reporting
				1542	** errors. The reports issued by valgrind are incorrect - we would
				1543	** prefer that the randomness be increased by making use of the
				1544	** uninitialized space in zBuf - but valgrind errors tend to worry
				1545	** some users. Rather than argue, it seems easier just to initialize
				1546	** the whole array and silence valgrind, even if that means less randomness
				1547	** in the random seed.
				1548	**
				1549	** When testing, initializing zBuf[] to zero is all we do. That means
				1550	** that we always use the same random number sequence.* This makes the
				1551	** tests repeatable.
				1552	*/
				1553	memset(zBuf, 0, 256);
				1554	#if !defined(SQLITE_TEST)
				1555	{
drh	842b864	2005-01-21 17:53:17 +0000	[diff] [blame]	1556	int pid, fd;
				1557	fd = open("/dev/urandom", O_RDONLY);
				1558	if( fd<0 ){
				1559	time((time_t*)zBuf);
				1560	pid = getpid();
				1561	memcpy(&zBuf[sizeof(time_t)], &pid, sizeof(pid));
				1562	}else{
				1563	read(fd, zBuf, 256);
				1564	close(fd);
				1565	}
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1566	}
				1567	#endif
				1568	return SQLITE_OK;
				1569	}
				1570
				1571	/*
				1572	** Sleep for a little while. Return the amount of time slept.
				1573	*/
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	1574	static int unixSleep(int ms){
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1575	#if defined(HAVE_USLEEP) && HAVE_USLEEP
				1576	usleep(ms*1000);
				1577	return ms;
				1578	#else
				1579	sleep((ms+999)/1000);
				1580	return 1000*((ms+999)/1000);
				1581	#endif
				1582	}
				1583
				1584	/*
				1585	** Static variables used for thread synchronization
				1586	*/
				1587	static int inMutex = 0;
drh	7906975	2004-05-22 21:30:40 +0000	[diff] [blame]	1588	#ifdef SQLITE_UNIX_THREADS
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1589	static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
drh	7906975	2004-05-22 21:30:40 +0000	[diff] [blame]	1590	#endif
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1591
				1592	/*
				1593	** The following pair of routine implement mutual exclusion for
				1594	** multi-threaded processes. Only a single thread is allowed to
				1595	** executed code that is surrounded by EnterMutex() and LeaveMutex().
				1596	**
				1597	** SQLite uses only a single Mutex. There is not much critical
				1598	** code and what little there is executes quickly and without blocking.
				1599	*/
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	1600	static void unixEnterMutex(){
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1601	#ifdef SQLITE_UNIX_THREADS
				1602	pthread_mutex_lock(&mutex);
				1603	#endif
				1604	assert( !inMutex );
				1605	inMutex = 1;
				1606	}
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	1607	static void unixLeaveMutex(){
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1608	assert( inMutex );
				1609	inMutex = 0;
				1610	#ifdef SQLITE_UNIX_THREADS
				1611	pthread_mutex_unlock(&mutex);
				1612	#endif
				1613	}
				1614
				1615	/*
drh	88f474a	2006-01-02 20:00:12 +0000	[diff] [blame]	1616	** Return TRUE if we are currently within the mutex and FALSE if not.
				1617	** This routine is intended for sanity checking only. It is designed
				1618	** for use in an assert() to verify that the mutex is held or not held
				1619	** in certain routines.
				1620	*/
				1621	static int unixInMutex(){
				1622	return inMutex;
				1623	}
				1624
				1625	/*
danielk1977	13a68c3	2005-12-15 10:11:30 +0000	[diff] [blame]	1626	** This function is called automatically when a thread exists to delete
				1627	** the threads SqliteTsd structure.
				1628	**
				1629	** Because the SqliteTsd structure is required by higher level routines
				1630	** such as sqliteMalloc() we use OsFree() and OsMalloc() directly to
				1631	** allocate the thread specific data.
				1632	*/
danielk1977	c529f52	2005-12-15 10:50:53 +0000	[diff] [blame]	1633	#ifdef SQLITE_UNIX_THREADS
danielk1977	13a68c3	2005-12-15 10:11:30 +0000	[diff] [blame]	1634	static void deleteTsd(void *pTsd){
danielk1977	c529f52	2005-12-15 10:50:53 +0000	[diff] [blame]	1635	sqlite3Os.xFree(pTsd);
danielk1977	13a68c3	2005-12-15 10:11:30 +0000	[diff] [blame]	1636	}
danielk1977	c529f52	2005-12-15 10:50:53 +0000	[diff] [blame]	1637	#endif
danielk1977	13a68c3	2005-12-15 10:11:30 +0000	[diff] [blame]	1638
				1639	/*
				1640	** The first time this function is called from a specific thread, nByte
				1641	** bytes of data area are allocated and zeroed. A pointer to the new
				1642	** allocation is returned to the caller.
				1643	**
				1644	** Each subsequent call to this function from the thread returns the same
				1645	** pointer. The argument is ignored in this case.
				1646	*/
				1647	static void *unixThreadSpecificData(int nByte){
				1648	#ifdef SQLITE_UNIX_THREADS
				1649	static pthread_key_t key;
				1650	static int keyInit = 0;
				1651	void *pTsd;
				1652
				1653	if( !keyInit ){
				1654	sqlite3Os.xEnterMutex();
				1655	if( !keyInit ){
				1656	int rc;
				1657	rc = pthread_key_create(&key, deleteTsd);
				1658	if( rc ){
				1659	return 0;
				1660	}
				1661	keyInit = 1;
				1662	}
				1663	sqlite3Os.xLeaveMutex();
				1664	}
				1665
				1666	pTsd = (SqliteTsd *)pthread_getspecific(key);
				1667	if( !pTsd ){
danielk1977	c529f52	2005-12-15 10:50:53 +0000	[diff] [blame]	1668	pTsd = sqlite3Os.xMalloc(sizeof(SqliteTsd));
danielk1977	13a68c3	2005-12-15 10:11:30 +0000	[diff] [blame]	1669	if( pTsd ){
				1670	memset(pTsd, 0, sizeof(SqliteTsd));
				1671	pthread_setspecific(key, pTsd);
				1672	}
				1673	}
				1674	return pTsd;
				1675	#else
				1676	static char tsd[sizeof(SqliteTsd)];
danielk1977	c529f52	2005-12-15 10:50:53 +0000	[diff] [blame]	1677	static int isInit = 0;
danielk1977	13a68c3	2005-12-15 10:11:30 +0000	[diff] [blame]	1678	assert( nByte==sizeof(SqliteTsd) );
				1679	if( !isInit ){
				1680	memset(tsd, 0, sizeof(SqliteTsd));
				1681	isInit = 1;
				1682	}
				1683	return (void *)tsd;
				1684	#endif
				1685	}
				1686
				1687	/*
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1688	** The following variable, if set to a non-zero value, becomes the result
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	1689	** returned from sqlite3Os.xCurrentTime(). This is used for testing.
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1690	*/
				1691	#ifdef SQLITE_TEST
				1692	int sqlite3_current_time = 0;
				1693	#endif
				1694
				1695	/*
				1696	** Find the current time (in Universal Coordinated Time). Write the
				1697	** current time and date as a Julian Day number into *prNow and
				1698	** return 0. Return 1 if the time and date cannot be found.
				1699	*/
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	1700	static int unixCurrentTime(double *prNow){
drh	19e2d37	2005-08-29 23:00:03 +0000	[diff] [blame]	1701	#ifdef NO_GETTOD
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1702	time_t t;
				1703	time(&t);
				1704	*prNow = t/86400.0 + 2440587.5;
drh	19e2d37	2005-08-29 23:00:03 +0000	[diff] [blame]	1705	#else
				1706	struct timeval sNow;
				1707	struct timezone sTz; /* Not used */
				1708	gettimeofday(&sNow, &sTz);
				1709	*prNow = 2440587.5 + sNow.tv_sec/86400.0 + sNow.tv_usec/86400000000.0;
				1710	#endif
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1711	#ifdef SQLITE_TEST
				1712	if( sqlite3_current_time ){
				1713	*prNow = sqlite3_current_time/86400.0 + 2440587.5;
				1714	}
				1715	#endif
				1716	return 0;
				1717	}
				1718
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	1719	/* Macro used to comment out routines that do not exists when there is
				1720	** no disk I/O */
				1721	#ifdef SQLITE_OMIT_DISKIO
				1722	# define IF_DISKIO(X) 0
				1723	#else
				1724	# define IF_DISKIO(X) X
				1725	#endif
				1726
				1727	/*
				1728	** This is the structure that defines all of the I/O routines.
				1729	*/
				1730	struct sqlite3OsVtbl sqlite3Os = {
				1731	IF_DISKIO( unixOpenReadWrite ),
				1732	IF_DISKIO( unixOpenExclusive ),
				1733	IF_DISKIO( unixOpenReadOnly ),
				1734	IF_DISKIO( unixDelete ),
				1735	IF_DISKIO( unixFileExists ),
				1736	IF_DISKIO( unixFullPathname ),
				1737	IF_DISKIO( unixIsDirWritable ),
				1738	IF_DISKIO( unixSyncDirectory ),
				1739	IF_DISKIO( unixTempFileName ),
				1740	unixRandomSeed,
				1741	unixSleep,
				1742	unixCurrentTime,
				1743	unixEnterMutex,
				1744	unixLeaveMutex,
drh	88f474a	2006-01-02 20:00:12 +0000	[diff] [blame]	1745	unixInMutex,
danielk1977	c529f52	2005-12-15 10:50:53 +0000	[diff] [blame]	1746	unixThreadSpecificData,
				1747	genericMalloc,
				1748	genericRealloc,
				1749	genericFree,
				1750	genericAllocationSize
drh	054889e	2005-11-30 03:20:31 +0000	[diff] [blame]	1751	};
				1752
				1753
				1754
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1755	#endif /* OS_UNIX */