Zerotools - Source code

Copyright 2006, 2007 Aleksandr Koltsoff

Table of contents

Document conventions

The following markup list will aid you in understanding the markup used in this document:

This document was prepared and generated using DOCSE, an automated document preparation system in fluid motion (not released to the public).

Source code for zerotools

Feel free to browse the source code and even click on the links. Unfortunately TR-parser is not yet complete so references to glibc routines are not activated in these listings. For example on how that would work, please see the synhilite -project which has active references against GTK+ documentation.

This page is generated automatically upon each zerotools update so it always reflects the current version.

Also please notice that even if the source files are placed under GPL, it doesn't mean that this specific page is.

  1 /**
  2  * Part of zerotools (http://koltsoff.com/pub/zerotools/)
  3  *
  4  * Small utility to remove files but zeroing their contents
  5  * first. Mainly useful in virtual machines when dealing with
  6  * virtual disks.
  7  *
  8  * Copyright 2006, 2007 Aleksandr Koltsoff (czr@iki.fi)
  9  *
 10  * Released under the GNU General Public License (version 2). See the
 11  * attached COPYING file. Should the file be missing, please
 12  * see http://www.gnu.org/licenses/gpl.txt
 13  *
 14  * For each argument:
 15  * - stat:
 16  *   - if non-directory but not regular file, unlink directly
 17  *   - if regular file proceed with:
 18  *     - open file (for writing only, but do not truncate)
 19  *     - write zeroes up to the length of file (and sync writes at end)
 20  *     - empty files will not be filled
 21  *     - files with link count > 1 won't be filled
 22  *   - non-directories will be all unlinked at the end
 23  */
 24 
 25 #include <stdio.h>
 26 #include <sys/types.h>  // lstat, open
 27 #include <sys/stat.h>   // lstat, open
 28 #include <fcntl.h>      // open
 29 #include <unistd.h>     // lstat, close, write, unlink, fdatasync
 30 #include <string.h>     // memset
 31 #include <errno.h>      // errno
 32 
 33 #define BUFFER_SIZE (128*1024)
 34 
 35 /**
 36  * Opens a file in write mode and then fills it with
 37  * zero bytes
 38  * Writes out from stack
 39  */
 40 static void zeroFile(const char* path, long long count) {
 41   int fd;
 42   char buffer[BUFFER_SIZE];
 43   // number of bytes written so far
 44   ssize_t writtenCount = 0;
 45 
 46   memset(buffer, 0, sizeof(buffer));
 47 
 48   fd = open(path, O_WRONLY);
 49 
 50   if ((fd == -1) && (errno == EACCES)) {
 51     // file might be missing write access
 52     // attempt to change mode to -w--w--w-
 53     if (chmod(path, S_IWUSR | S_IWGRP | S_IWOTH) == -1) {
 54       fprintf(stderr, NAME ": Failed to enable write-access to %s\n", path);
 55     } else {
 56       // attempt to open the file again
 57       // NOTE: failures will be handled below
 58       fd = open(path, O_WRONLY);
 59     }
 60   }
 61 
 62   if (fd == -1) {
 63     perror(NAME ": Failed open");
 64     return;
 65   }
 66   //printf("Opened ok\n");
 67   while (writtenCount < count) {
 68     // number of bytes to write this iteration
 69     ssize_t writeCount = count - writtenCount;
 70     // number of bytes that we wrote this iteration
 71     ssize_t wroteCount;
 72 
 73     // limit writeCount to buffer size
 74     if (writeCount > sizeof(buffer)) {
 75       writeCount = sizeof(buffer);
 76     }
 77 
 78     wroteCount = write(fd, buffer, writeCount);
 79     if (wroteCount != writeCount) {
 80       // if syscall was interrupted (no data was consumed), retry
 81       // the syscall
 82       if ((wroteCount == -1) && (errno == EINTR)) {
 83         // since no write was done, adjust for the decrement
 84         // that we'll hit shortly so that we end up writing the
 85         // proper amount of zeroes anyway
 86         writtenCount -= writeCount;
 87       } else {
 88         // some other error (just print message and don't
 89         // attempt to write any more zeroes)
 90         fprintf(stderr, NAME
 91           ": Failed to write data (wrote %lld, instead of wanted %lld)\n",
 92           (long long)wroteCount, (long long)writeCount);
 93         fprintf(stderr, NAME ": Proceeding with unlink\n");
 94         break;
 95       }
 96     }
 97     writtenCount += wroteCount;
 98   }
 99   // synchronize the zeroing to disk (at least attempt to).
100   // if this is not done, Linux kernel won't flush the outstanding
101   // writes to disk on file close if the writes are still in page cache
102   // on close (this is the case with small files).
103   // NOTE: there is precious little we can do on errors so
104   //       they're not checked for here.
105   fdatasync(fd);
106   // remove name from the file
107   if (unlink(path) != 0) {
108     fprintf(stderr, NAME ": Failed to unlink %s\n", path);
109   }
110   // close the file in any case. again no error checking since we can't
111   // do anything with them anyway.
112   close(fd);
113 }
114 
115 /**
116  * Main program:
117  * for each arg:
118  * 1) stat
119  * 2) if looks sane, fill
120  * 3) unlink if non-dir
121  */
122 int main(int argc, char** argv) {
123 
124   int i;
125 
126   if (argc < 2) {
127     fprintf(stderr, "USAGE: " NAME " path[s]\n"
128       NAME " will process each file in turn and\n"
129       "overwrite the contents of all regular files specified\n"
130       "with binary zero.\n"
131       "It will unlink all non-directories (even filled regulars)\n");
132     return 1;
133   }
134 
135   // iterate over the parameters
136   for (i = 1; i < argc; i++) {
137     struct stat stbuf;
138     int err;
139 
140     // get inode info behind the name
141     err = lstat(argv[i], &stbuf);
142     if (err != 0) {
143       // we'll fail in this case
144       // (should we continue anyway? no)
145       perror(NAME ": Failed in statting");
146       return 2;
147     }
148 
149     // check for regular files which are not empty
150     if ((S_ISREG(stbuf.st_mode)) && (stbuf.st_size > 0)) {
151       // fill only when there's only one link to the file
152       if (stbuf.st_nlink == 1) {
153         // process the file
154         zeroFile(argv[i], stbuf.st_size);
155         printf(NAME ": Zeroed and removed %s\n", argv[i]);
156       } else {
157         printf(NAME ": %s has more than one link, unlinking only\n",
158           argv[i]);
159         if(unlink(argv[i]) != 0) {
160           perror("Failed to unlink file\n");
161         }
162       }
163     } else {
164       // unlink it if it is not a directory
165       if (!S_ISDIR(stbuf.st_mode)) {
166         err = unlink(argv[i]);
167         if (err != 0) {
168           err = errno;
169           fprintf(stderr, NAME ": Failed to remove %s\n", argv[i]);
170           errno = err;
171           perror(NAME);
172         } else {
173           printf(NAME ": Unlinked %s\n", argv[i]);
174         }
175       } else {
176         fprintf(stderr, NAME ": %s is a directory, skipping\n",
177           argv[i]);
178       }
179     }
180   }
181 
182   return 0;
183 }

Listing 1: Source of temp/zerofile.c

  1 /**
  2  * Part of zerotools (http://koltsoff.com/pub/zerotools/)
  3  *
  4  * A wrapper for unlink which will fill files to be unlinked
  5  * with zero first, and then issue the real unlink. Tries to
  6  * be extra careful about everything.
  7  *
  8  * Example use:
  9  * ZEROFILE_VERBOSE=2 LD_PRELOAD=./zerounlink.so rm file
 10  *
 11  * Copyright 2006, 2007 Aleksandr Koltsoff (czr@iki.fi)
 12  *
 13  * Released under the GNU Public License (version 2). See the
 14  * attached COPYING file. Should the file be missing,
 15  * please see http://www.gnu.org/licenses/gpl.txt
 16  */
 17 
 18 // 128KiB seems to be pretty efficient size for block I/O
 19 #define ZEROFILE_BUFFER_SIZE (128*1024)
 20 
 21 // the verbosity selector
 22 #define ZEROFILE_VERBOSE_ENVVAR ("ZEROFILE_VERBOSE=")
 23 
 24 // if makefile passed this, then we will only try to dlopen
 25 // the user-specified library. otherwise we try to use
 26 // automatic detection.
 27 #ifndef ZEROFILE_LIBC_PATH
 28   // get dladdr (and in the process environ)
 29   #define _GNU_SOURCE
 30   // get definition of Dl_info if not using static libc path
 31   #define __USE_GNU
 32   // we try to locate libc using this symbol
 33   // if you are using this source code to
 34   // provide a replacement for "div" yourself,
 35   // you'll have to find a better suiting symbol
 36   // no quotes allowed around the symbol here!
 37   #define ZEROFILE_LIBC_LOCATION_SYMBOL div
 38   // this is used for display purposes so you might want
 39   // to change this to reflect the real symbol as well
 40   #define ZEROFILE_LIBC_LOCATION_SYMBOL_NAME "div"
 41 #endif
 42 
 43 /**
 44  * END OF USER SERVICEABLE CONFIGURATION
 45  */
 46 
 47 #include <stdio.h>      // printf, perror
 48 #include <sys/types.h>  // open, pid_t
 49 #include <sys/stat.h>   // open
 50 #include <fcntl.h>      // open
 51 #include <errno.h>      // the global errno-variable
 52 #include <unistd.h>     // unlink, lstat, getpid
 53 #include <dlfcn.h>      // dynamic linker lib
 54 #include <stdlib.h>     // exit, atoi, "div"
 55 #include <string.h>     // memset
 56 #include <pthread.h>    // pthread_mutex_lock/unlock/initializer
 57 
 58 #ifdef ZEROFILE_LIBC_PATH
 59 // define environ since _GNU_SOURCE is not defined
 60 extern const char** environ;
 61 #endif
 62 
 63 // varags macro for debug printing
 64 // (can't use the C99 version because lack of ##)
 65 #define ZEROFILE_PRINT1(s, args...) if (zf_verbose >= 1) \
 66   fprintf(stderr, NAME ": " s, ## args)
 67 #define ZEROFILE_PRINT2(s, args...) if (zf_verbose >= 2) \
 68   fprintf(stderr, NAME ": " s, ## args)
 69 #define ZEROFILE_PRINT3(s, args...) if (zf_verbose >= 3) \
 70   fprintf(stderr, NAME ": " s, ## args)
 71 // special version that will stringify the s
 72 // used only from one location but needs to be a macro
 73 #define ZEROFILE_PRINT3_S1(s, s1) \
 74   ZEROFILE_PRINT3(s, #s1)
 75 
 76 /**
 77  * Check whether we should be verbose and returns the verbosity
 78  * level based on contents of ZEROFILE_VERBOSE_ENVVAR. The
 79  * verbosity level is cumulative (level 3 includes levels 2 and
 80  * 1).
 81  *
 82  * Returns:
 83  * 0 : no messages (no errors either)
 84  *     also used when envvar is missing completely
 85  * 1 : only errors are shown if they occur
 86  * 2 : show unlink information
 87  *     default with envvar but without level (empty envvar)
 88  * 3 : show all messages (full debug)
 89  */
 90 static int zf_getVerbosityLevel(void) {
 91   int i = 0;
 92 
 93   while (environ[i] != NULL) {
 94     if (strncmp(environ[i],
 95                 ZEROFILE_VERBOSE_ENVVAR,
 96                 strlen(ZEROFILE_VERBOSE_ENVVAR)) == 0) {
 97       // start with default level when we have the envvar
 98       int level = 2;
 99       // decoded binary of envvar contents
100       long int tlevel = -1;
101       // used to check whether decoding succeeds
102       char* endptr;
103 
104       // we don't bother with envvars which are empty
105       if (environ[i][strlen(ZEROFILE_VERBOSE_ENVVAR)] != '\0') {
106         tlevel = strtol(&environ[i][strlen(ZEROFILE_VERBOSE_ENVVAR)],
107                         &endptr, 10);
108         // check if whole string was processed
109         if (*endptr == '\0') {
110           // only allow level setting if the parsed value is within
111           // possible limits
112           if ((tlevel >= 0) && (tlevel <= 3)) {
113             level = tlevel;
114           }
115         }
116       }
117       return level;
118     }
119     // this wasn't the envvar we were looking for. try next one.
120     i++;
121   }
122   return 0;
123 }
124 
125 // protects the critical section where setup code lives.
126 // the setup code must only be run once
127 static pthread_mutex_t zf_setupMutex = PTHREAD_MUTEX_INITIALIZER;
128 // verbosity indicator (see zf_getVerbosityLevel)
129 static int zf_verbose = 0;
130 
131 /**
132  * This version will:
133  * - do an lstat first
134  * - if link count == 1 and is a regular file:
135  *   - open the file for writing only, fill it with zero
136  *     close it
137  * - call the real unlink
138  */
139 int unlink(const char* pathname) {
140 
141   // handle to libc (also we use it to determine
142   // whether we have inited yet or not)
143   static volatile void* dlHandle = NULL;
144   static int (*realUnlink)(const char*); // ptr to original
145   // this buffer is static on purpose (it's filled with zeroes
146   // only once on lib init)
147   static char zeroBuffer[ZEROFILE_BUFFER_SIZE];
148 
149   struct stat stbuf;
150   int err;
151 
152   // setup is rather long-winded process, so we'll do it only
153   // once. we also have a critical section against multiple
154   // threads trying to init the lib each time. So, using local
155   // static storage shouldn't be such a big problem.
156   if (dlHandle == NULL) {
157     char* errMsg = NULL;
158 
159     // it's safe to call this from two threads at once
160     zf_verbose = zf_getVerbosityLevel();
161     ZEROFILE_PRINT3("SETUP: Running verbose, starting init-phase"
162       " for process %u)\n", (unsigned)getpid());
163 
164     // lock
165     pthread_mutex_lock(&zf_setupMutex);
166 
167     // CRITICAL SECTION STARTS HERE
168     // (any code returning/gotoing from within needs to unlock
169     //  the mutex implicitly)
170 
171     ZEROFILE_PRINT3("SETUP: Starting critical\n");
172     // check whether another thread already setup the lib for
173     // us. ENTER1-SL1-ENTER2-WAKE1 problem
174     if (dlHandle != NULL) {
175       ZEROFILE_PRINT3(
176         "SETUP: NOTICE: another thread setup the lib already\n");
177     } else {
178       ZEROFILE_PRINT3(
179         "SETUP: (doing init as the first thread)\n");
180 
181       ZEROFILE_PRINT3(
182         "SETUP: Opening libc to setup a pointer to real unlink\n");
183 
184       // we have two choices when selecting the library from
185       // which we try to get the real 'unlink':
186       // - either we try to find absolute path to library
187       //   that _should_ contain it. we'll use the
188       //   symbol of 'div' in order to find libc.
189       // - user has supplised a hard coded path to the
190       //   library to use (ZEROFILE_LIBC_PATH, passed from
191       //   the Makefile normally)
192       //
193       // In both cases we'll use lazy symbol binding to
194       // speed up things (and not to complicate matters).
195 #ifdef ZEROFILE_LIBC_PATH
196       ZEROFILE_PRINT3(
197         "SETUP: Using hard-coded path '%s' for libc\n",
198         ZEROFILE_LIBC_PATH);
199       dlHandle = dlopen(ZEROFILE_LIBC_PATH, RTLD_LAZY);
200 #else
201       ZEROFILE_PRINT3(
202         "SETUP: Finding library owning the symbol '"
203         ZEROFILE_LIBC_LOCATION_SYMBOL_NAME "'\n");
204       {
205         // dladdr will store results here
206         Dl_info dlInfo;
207 
208         // dladdr returns non-zero for success
209         if (dladdr(&ZEROFILE_LIBC_LOCATION_SYMBOL, &dlInfo) != 0) {
210           ZEROFILE_PRINT3(
211             "SETUP:  Owner: '%s'\n", dlInfo.dli_fname);
212           // we'll then pass dli_fname to dlopen
213           dlHandle = dlopen(dlInfo.dli_fname, RTLD_LAZY);
214         } else {
215           ZEROFILE_PRINT1(
216             "SETUP:  Failed to locate libc. Please supply a "
217             "hard-coded path using zf_libc_path in the "
218             "Makefile\n");
219           goto error;
220         }
221       }
222 #endif
223       if (dlHandle == NULL) {
224         ZEROFILE_PRINT1(
225           "SETUP: FAILURE: Failed to dl libc: '%s'\n", dlerror());
226         // quit with an error (will unlock as well)
227         goto error;
228       }
229 
230       ZEROFILE_PRINT3("SETUP: Getting address of real unlink\n");
231       // clear error since dlsym might return NULLs for symbols)
232       dlerror();
233       // need an extra typecast here since we have defined this
234       // as a volatile type
235       realUnlink = dlsym((void*)dlHandle, "unlink");
236       errMsg = dlerror();
237       if (errMsg != NULL) {
238         ZEROFILE_PRINT1(
239           "SETUP: FAILURE:  Problem getting symbol 'unlink': '%s'\n",
240           errMsg);
241         // quit with an error (will unlock as well)
242         goto error;
243       } else {
244         ZEROFILE_PRINT3("SETUP:  Real unlink at %p\n", realUnlink);
245       }
246 
247       ZEROFILE_PRINT3("SETUP: Preparing zero-buffer\n");
248 
249       memset(zeroBuffer, 0, sizeof(zeroBuffer));
250 
251       ZEROFILE_PRINT3("SETUP: Ready for operation\n");
252     }
253 
254     // CRITICAL SECTION ENDS HERE
255     pthread_mutex_unlock(&zf_setupMutex);
256   }
257   ZEROFILE_PRINT3("Invoked with ('%s')\n", pathname);
258 
259   // do the stat first
260   err = lstat(pathname, &stbuf);
261   if (err == 0) {
262     // stat was ok
263     if (S_ISREG(stbuf.st_mode) &&
264          (stbuf.st_nlink == 1) &&
265           (stbuf.st_size != 0)) {
266       // number of bytes we have written so far
267       ssize_t writtenCount = 0;
268       int fd;
269 
270       ZEROFILE_PRINT2(
271         "ZEROING %s (%lld bytes)\n", pathname,
272         (long long)stbuf.st_size);
273 
274       fd = open(pathname, O_WRONLY);
275       if ((fd == -1) && (errno == EACCES)) {
276         // file might be missing write access
277         // attempt to place write-onlys on file
278         ZEROFILE_PRINT2("Attempting to switch file mode to all-writes for %s\n", pathname);
279 
280         if (chmod(pathname, S_IWUSR | S_IWGRP | S_IWOTH) == -1) {
281           ZEROFILE_PRINT1("Failed to force write allowed on %s\n", pathname);
282         } else {
283           // mode changed. now retry opening file
284           fd = open(pathname, O_WRONLY);
285         }
286       }
287 
288       // if we failed in opening the file for write (even after switching mode),
289       // there's not much we can do
290       if (fd == -1) {
291         ZEROFILE_PRINT1(
292           "FAILURE: Failed to open '%s' for writing"
293           " (no zeroing will be done)\n", pathname);
294         // need to break out to real unlink
295         return realUnlink(pathname);
296       }
297 
298       // unlink the file here so that we limit the amount of
299       // time that the unlinked file is visible to other
300       // processes/threads
301       err = realUnlink(pathname);
302       ZEROFILE_PRINT3(
303         "Real unlink returns %d, starting fill\n", err);
304 
305       // keep writing zeroes until the file has been filled
306       while (writtenCount < stbuf.st_size) {
307         ssize_t writeCount = stbuf.st_size - writtenCount;
308         ssize_t wroteCount = 0;
309 
310         // clip to max buffer size
311         if (writeCount > ZEROFILE_BUFFER_SIZE) {
312           writeCount = ZEROFILE_BUFFER_SIZE;
313         }
314 
315         wroteCount = write(fd, zeroBuffer, writeCount);
316         if (wroteCount != writeCount) {
317           if ((wroteCount == -1) && (errno == EINTR)) {
318             // write-syscall was interrupted before it used any data
319             // fixup writtenCount so that next write will handle
320             // this same iteration again
321             writtenCount -= writeCount;
322           } else {
323             ZEROFILE_PRINT1(
324               "FAILURE: Failed to write all necessary bytes "
325               "(wrote %lld instead of %lld)\n",
326               (long long)wroteCount, (long long)writeCount);
327             // bugfix 1: when error is encountered, break out
328             // on the first error, don't spin here forever
329             break;
330           }
331         }
332         writtenCount += wroteCount;
333       }
334 
335       // synchronize outstanding writes from page cache
336       // NOTE: we don't handle errors since there's not much
337       //       we can do anyway
338       fdatasync(fd);
339 
340       ZEROFILE_PRINT3("Closing and returning err=%d\n", err);
341       close(fd);
342       // we return already here with the real unlink retcode
343       return err;
344     }
345   } else {
346     ZEROFILE_PRINT1(
347       "Failed to stat '%s', proceeding with normal unlink\n",
348       pathname);
349   }
350   // this code path is executed for non-regular files as well
351   // as zero size regular ones
352 
353   // do the real unlink
354   err = realUnlink(pathname);
355   ZEROFILE_PRINT3("Real unlink returns %d\n", err);
356   return err;
357 
358 // error handling from setup when we fail to dl stuff
359 error:
360 
361   ZEROFILE_PRINT3("SETUP: Executing common error unrolling\n");
362 
363   // if dlHandle is available, close it
364   if (dlHandle != NULL) {
365     ZEROFILE_PRINT3("SETUP: closing dl to libc\n");
366     dlclose((void*)dlHandle);
367     dlHandle = NULL;
368   }
369 
370   // unlock the critical mutex
371   pthread_mutex_unlock(&zf_setupMutex);
372 
373   ZEROFILE_PRINT3("SETUP: Critical region exited\n");
374 
375   // setup errno to caller (we can't unlink anyway)
376   errno = EINVAL;
377   return -1;
378 }

Listing 2: Source of temp/zerounlink.c

  1 /**
  2  * Part of zerotools (http://koltsoff.com/pub/zerotools/)
  3  *
  4  * Wrapper helper to go with zerounlink.so.
  5  * Replaces shell scripts that use LD_PRELOAD and also makes
  6  * it easier to configure which programs will go through the
  7  * wrapper.
  8  *
  9  * Please see comments at start of main for a rough overview
 10  * what this program attempts to do.
 11  *
 12  * Copyright 2006, 2007 Aleksandr Koltsoff (czr@iki.fi)
 13  *
 14  * Released under the GNU General Public License (version 2). See the
 15  * attached COPYING file. Should the file be missing,
 16  * please see http://www.gnu.org/licenses/gpl.txt
 17  */
 18 
 19 #include <stdio.h>
 20 #include <sys/types.h>  // stat
 21 #include <sys/stat.h>   // stat
 22 #include <unistd.h>     // execve, access, stat
 23 #include <string.h>     // strncmp
 24 #include <stdlib.h>     // exit
 25 #include <errno.h>      // errno
 26 #include <limits.h>     // PATH_MAX
 27 
 28 extern char** environ; // the environment we were called with
 29 
 30 // absolute path to the wrapper shared object to use.
 31 // this comes from outside world (Makefile will give ZF_SONAME)
 32 #define ZEROFILE_WRAPPER_PATH ZF_SONAME
 33 
 34 
 35 /**
 36  * Utility function to create/modify an environment that will
 37  * be passed to ld.so executing the target executable (i.e.,
 38  * the program that user actually wanted to run).
 39  *
 40  * There are two cases:
 41  * - environment already has LD_PRELOAD:
 42  *   - we add the new .so after the existing ones.
 43  *     referring to the same .so is not harmful it seems, so
 44  *     we don't check whether it is already present (bdev+ino
 45  *     checks could be used for that)
 46  * - environment doesn't have LD_PRELOAD:
 47  *   - we create the new variable
 48  *   - we allocate memory for a new environment that will
 49  *     hold the old one and the new preload-envvar at the end
 50  * Return value is the pointer to the "new" envvar-table
 51  * (in the first case we just reuse our env-table, so it's not
 52  * new).
 53  */
 54 static char** makeNewEnviron(void) {
 55   // index at which existing LD_PRELOAD is (-1 if none)
 56   int preloadIndex = -1;
 57   // current index that we test in environ[]
 58   int i = 0;
 59 
 60   // locate the position of existing LD_PRELOAD (if any)
 61   while (environ[i] != NULL) {
 62     if (strncmp(environ[i],
 63                 "LD_PRELOAD=",
 64                 strlen("LD_PRELOAD=")) == 0) {
 65       // hit preload
 66       preloadIndex = i;
 67     }
 68     i++;
 69   }
 70   // at this point i is number valid entries in environ[]
 71   // (inclusive of the terminating NULL)
 72 
 73   // if LD_PRELOAD exists, we need to manufacture a new one
 74   // which will also contain our .so file
 75   if (preloadIndex != -1) {
 76     int preloadLen = strlen(environ[preloadIndex]);
 77     char* newPreload = NULL;
 78 
 79     // allocate enough space to hold the new string
 80     // 1 for space, 1 for terminator (=2)
 81     newPreload = malloc(preloadLen+2+
 82                         strlen(ZEROFILE_WRAPPER_PATH));
 83     // OOM? report to caller which will report it to user
 84     if (newPreload == NULL) return NULL;
 85 
 86     // time to manufacture the new one
 87     sprintf(newPreload, "%s %s", environ[preloadIndex],
 88             ZEROFILE_WRAPPER_PATH);
 89 
 90     // replace the old one in the environ with the new one
 91     environ[preloadIndex] = newPreload;
 92     // return the modified table to caller
 93     return environ;
 94   } else {
 95     // we'll need to create a new environ table since we
 96     // need one additional envvar
 97     char** newEnviron = NULL;
 98     char* newPreload = NULL;
 99 
100     // alloc mem for new table (including new entry)
101     newEnviron = malloc(sizeof(char*)*(i+1));
102     // alloc mem for the new LD_PRELOAD envvar
103     newPreload = malloc(strlen("LD_PRELOAD=")+
104                         strlen(ZEROFILE_WRAPPER_PATH)+1);
105     // OOM causes break to user with error
106     if ((newEnviron == NULL) || (newPreload == NULL)) {
107       return NULL;
108     }
109 
110     // create the envvar contents
111     sprintf(newPreload, "LD_PRELOAD=%s", ZEROFILE_WRAPPER_PATH);
112     // copy pointers in current environ to the new one
113     memcpy(newEnviron, environ, sizeof(char*)*i);
114 
115     // add new entry at the end
116     newEnviron[i] = newPreload;
117     newEnviron[i+1] = NULL;
118 
119     return newEnviron;
120   }
121 }
122 
123 /**
124  * Utility function that replicates the "shell" program
125  * search logic using PATH envvar with a twist. The shell
126  * logic is to test the given basename in each PATH component
127  * until a suitable one is found.
128  *
129  * Since it's possible that the PATH contains the directory
130  * from which this program was started, we'll stat each
131  * candidate to check whether the bdev+ino are the same as
132  * this program and ignore such entries. This means that
133  * we'll never end up in an infinite loop starting ourselves.
134  *
135  * Non-8-bit codesets might present problems in PATH
136  * processing, but not sure how to handle them properly
137  * anyway in close-to-standard C. Not even sure whether
138  * this is a problem.
139  *
140  * Returns a pointer to the found executable. The
141  * name lives in static local storage so this function is
142  * not thread-safe. On the other hand this application will
143  * never use threads, so it's not a problem.
144  *
145  * Will return NULL if PATH doesn't exist or no suitable
146  * executable candidate was found.
147  */
148 static char* findProgram(const struct stat* ourSt,
149                          const char* basename) {
150   // pointer to PATH once we find it
151   const char* path = NULL;
152   // envvar iteration index
153   int i = 0;
154   // character index into path
155   int sIdx = 0;
156   // path length from start to end (for the value)
157   int pLen = 0;
158 
159   // locate PATH and set path to the value when found
160   while (environ[i] != NULL) {
161     if (strncmp(environ[i], "PATH=", 5) == 0) {
162       path = &environ[i][5];
163       break;
164     }
165     i++;
166   }
167   if (path == NULL) {
168     return NULL;
169   }
170 
171   // at this point we might have a proper path.
172   // we need to process one path component at a time and
173   // glue '/basename' to it, then test for uniqueness and
174   // that the target is accessable for execution by us.
175   // we'll return the first executable that satisfies the
176   // criteria (just like shell).
177   pLen = strlen(path);
178   // process each component until we run out of PATH
179   while (sIdx < pLen) {
180     // pointer to the next : in PATH
181     char* colonAt = NULL;
182     // length in bytes for the path component
183     char pathCompLen = 0;
184 
185     // find the next colon
186     colonAt = strchr(&path[sIdx], ':');
187     if (colonAt == NULL) {
188       // this is the last path component since no :
189       // path component length the length of rest of the str
190       pathCompLen = pLen - sIdx;
191     } else {
192       pathCompLen = colonAt - &path[sIdx];
193     }
194 
195     // we ignore empty path components (we process only
196     // components with length > 0)
197     if (pathCompLen > 0) {
198       // this is the storage that will be "visible" to caller
199       // makes this code not thread-safe. but.
200       static char trypath[PATH_MAX*2];
201       // we'll store the stat result for each candidate here
202       struct stat stbuf;
203 
204       // manufacture a path to try
205       snprintf(trypath, sizeof(trypath)-1, "%.*s/%s", pathCompLen,
206                &path[sIdx], basename);
207       // these are left here for debugging (on purpose too)
208       //printf(" path comp len=%d\n", pathCompLen);
209       //printf(" '%.*s'\n", pathCompLen, &path[sIdx]);
210       //printf(" trypath='%s'\n", trypath);
211 
212       // stat the destination
213       if (stat(trypath, &stbuf) == 0) {
214         // check that is is not the same as argv[0]
215         if ((stbuf.st_dev == ourSt->st_dev) &&
216             (stbuf.st_ino == ourSt->st_ino)) {
217           // points to the same file, ignore it
218           // we adjust the sIdx here since we restart the
219           // while loop shortly and this would normally be
220           // done at the end of the loop.
221           sIdx += pathCompLen+1;
222           continue;
223         }
224         // it exists and it's not the same file as from which
225         // we started from. check access (we could do it also
226         // with checks against stbuf, but it would require
227         // even more syscalls)
228         if (access(trypath, X_OK) == 0) {
229           return trypath;
230         }
231       }
232     }
233     // advance to next path component
234     sIdx += pathCompLen+1;
235   }
236   // we only get here if we didn't find anything or path is
237   // empty so return NULL
238   return NULL;
239 }
240 
241 /**
242  * The main prog:
243  * 1) Get bdev+ino for this executable
244  * 2) Isolate basename
245  * 3) Using PATH, find target executable using 1) and 2)
246  * 4) Prepare environment (LD_PRELOAD)
247  * 5) Execve to target executable
248  */
249 int main(int argc, char** argv) {
250 
251   // the result of doing a stat on this executable
252   struct stat stbuf;
253   // path to the target executable once we find it
254   char* targetPath = NULL;
255   // ptr to the basename part of argv[0]
256   char* basename = NULL;
257   // ptr to new environment (after modifications)
258   char** newEnviron = NULL;
259 
260   // we're gonna be evil here and bind this
261   // to Linux. otherwise it's not easy to get the bdev+ino
262   // pair from the executing process. we'll use this
263   // pair to avoid loops due to PATH-weirdness or sloppyness
264   if (stat("/proc/self/exe", &stbuf) != 0) {
265     // this really shouldn't happen on a normal system.
266     // one case where it's possible is where /proc isn't
267     // mounted or for security reasons a process cannot
268     // access proc (even the entry describing itself)
269     perror(NAME ": Can't stat ourself (/proc/self/ missing?)");
270     return 1;
271   }
272 
273   // isolate the basename
274   // start from end of argv[0] and go back until first slash
275   // is found. if no slash is found, then we take the whole
276   // thing as basename
277   basename = strrchr(argv[0], '/');
278   if (basename == NULL) {
279     basename = argv[0];
280   } else {
281     basename++;
282   }
283 
284   // locate the suitable target executable using basename and
285   // PATH.
286   targetPath = findProgram(&stbuf, basename);
287   if (targetPath == NULL) {
288     fprintf(stderr, NAME ": Cannot locate '%s' in PATH!\n",
289             basename);
290     errno = ENOENT;
291     perror(NULL);
292     return 2;
293   }
294 
295   // we don't need to touch argv
296   // infact, we want to target program to see the current
297   // argv[0] so that if it has some weird checks on which name
298   // was used to invoke it, it will work in all cases. shell
299   // scripts seem to be special in getting their argv[0].
300 
301   // next we need to prepare the environ for the new program
302   // namely we need to add the LD_PRELOAD or modify the
303   // existing one to contain the new entry
304   newEnviron = makeNewEnviron();
305   if (newEnviron == NULL) {
306     fprintf(stderr, NAME ": failed to allocate memory\n");
307     return 3;
308   }
309 
310   // load the image of target executable on top of this
311   // process (irreversible when succeeds).
312   execve(targetPath, argv, newEnviron);
313   // we normally do not return from execve (but suppose that
314   // process table is full, or other badness happens, we
315   // will, although execve doesn't require a new entry in the
316   // process table since it will replace the existing process
317   // with a new program)
318   perror(NAME ": failed to start real process");
319   return 4;
320 }

Listing 3: Source of temp/zerounlink-wrapper.c