/* * Copyright (c) 2010 Peter J. Philipp * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * */ /* * Don't use this program, but study it if you like. * This is a proof of concept program. It does what gzip does only with * space conservation in mind. When gzip compresses a file it writes the * compressed file out as it reads from the original file until EOF. It * then unlinks the original file. A lot of space is wasted on the disk * and if you run out of filesystem space you can't compress the file. * * My program first slices up the file in blocks and writes them reversed * (with a two byte trailer per block) to a second file, as it writes a new * block it truncates a block from the original. Then the file is written * as a gzip file as the second file is truncated per block. So at first * it grows a little bit, then it shrinks on the same filesystem space that * it was stored on at first. * * Shouldn't this be very fast on SSD drives? */ /* --- /dev/svnd0a 31.4M 31.2M -1.4M 105% /mnt # ls -l total 63904 -rw-r--r-- 1 root wheel 672449 Apr 24 22:52 Superfish.jpg -rw-r--r-- 1 root wheel 512000 Apr 24 22:54 blah -rw-r--r-- 1 root wheel 31457280 Apr 24 22:45 sparsefile # gzip Superfish.jpg /mnt: write failed, file system is full gzip: Superfish.jpg.gz: No space left on device # ls -l total 64164 -rw-r--r-- 1 root wheel 672449 Apr 24 22:52 Superfish.jpg -rw-r--r-- 1 root wheel 131096 Apr 24 22:52 Superfish.jpg.gz -rw-r--r-- 1 root wheel 512000 Apr 24 22:54 blah -rw-r--r-- 1 root wheel 31457280 Apr 24 22:45 sparsefile # rm *gz # ~pjp/compress Superfish.jpg # ls -l total 63520 -rw-r--r-- 1 root wheel 477699 Apr 24 22:55 Superfish.jpg.gz -rw-r--r-- 1 root wheel 512000 Apr 24 22:54 blah -rw-r--r-- 1 root wheel 31457280 Apr 24 22:45 sparsefile --- */ /* * After further research of this program it becomes evident that when it * hits the wall (No space left on device) it is next to impossible to * repair the file to be compressed. This is a downside and probably the * reason gzip wasn't made to be more like this one. This program is * dangerous. I wouldn't use it. */ #include #include #include #include #include #include #include #include #define BLOCKSIZE 512 void spool_back1(int fdin, int fdout, char *); int main(int argc, char *argv[]) { struct stat sb; int fdin, fdout; int blocksize; int destlen; off_t offset; char *tmpfile; char buf[BLOCKSIZE]; char cbuf[1024]; u_int16_t *bytes; gzFile *gzf; if (argc != 2) { fprintf(stderr, "usage: compress file\n"); exit(1); } fdin = open(argv[1], O_RDWR, 0); if (fdin < 0) { perror("open"); exit(1); } if (fstat(fdin, &sb) < 0) { perror("fstat"); close(fdin); exit(1); } snprintf(buf, sizeof(buf), "%s.gztmp", argv[1]); tmpfile = strdup(buf); fdout = open(buf, O_RDWR | O_CREAT | O_TRUNC, 0600); if (fdout < 0) { perror("open"); close(fdin); exit(1); } while (1) { offset = lseek(fdin, 0, SEEK_END); if (offset - BLOCKSIZE < 0) { blocksize = offset; offset = 0; } else { blocksize = BLOCKSIZE; offset -= BLOCKSIZE; } lseek(fdin, offset, SEEK_SET); if (read(fdin, buf, blocksize) != blocksize) { perror("read"); /* spool back */ spool_back1(fdin, fdout, tmpfile); exit(1); } memcpy(cbuf, buf, blocksize); bytes = (u_int16_t *)&cbuf[blocksize]; *bytes = blocksize; if (write(fdout, cbuf, blocksize + 2) < 0) { perror("write"); /* spool back */ spool_back1(fdin, fdout, tmpfile); exit(1); } lseek(fdin, offset, SEEK_SET); ftruncate(fdin, offset); if (offset == 0) break; } close(fdin); unlink(argv[1]); snprintf(buf, sizeof(buf), "%s.gz", argv[1]); gzf = gzopen(buf, "w"); while (1) { offset = lseek(fdout, 0, SEEK_END); lseek(fdout, offset - 2, SEEK_SET); if (read(fdout, buf, 2) < 0) { perror("read 2"); /* spool back */ exit(1); } bytes = (u_int16_t *)&buf[0]; offset = lseek(fdout, 0, SEEK_END); offset -= (*bytes + 2); lseek(fdout, offset, SEEK_SET); if (read(fdout, cbuf, *bytes) < 0) { perror("read 3"); exit(1); } gzwrite(gzf, cbuf, *bytes); offset = lseek(fdout, 0, SEEK_END); offset -= (*bytes + 2); offset = lseek(fdout, offset, SEEK_SET); ftruncate(fdout, offset); if (offset == 0) break; } close(fdout); snprintf(buf, sizeof(buf), "%s.gztmp", argv[1]); unlink(buf); gzclose(gzf); } void spool_back1(int fdin, int fdout1, char *tmpfile) { struct stat sb; char buf[BLOCKSIZE + 2]; off_t offset; u_int16_t *bytes; int fdout; int len; snprintf(buf, sizeof(buf), "/tmp/%s", tmpfile); fdout = open(buf, O_RDWR | O_CREAT | O_TRUNC, 0600); if (fdout < 0) { perror("open"); exit(1); } lseek(fdout1, 0, SEEK_SET); while ((len = read(fdout1, buf, sizeof(buf))) != 0) { write(fdout, buf, len); } close(fdout1); unlink(tmpfile); while (1) { offset = lseek(fdout, 0, SEEK_END); lseek(fdout, offset - 2, SEEK_SET); if (read(fdout, buf, 2) < 0) { perror("read"); return; } bytes = (u_int16_t *)&buf[0]; offset = lseek(fdout, 0, SEEK_END); offset -= (*bytes + 2); lseek(fdout, offset, SEEK_SET); if (read(fdout, buf, *bytes) < 0) { perror("read"); return; } if (write(fdin, buf, *bytes) < 0) { perror("write"); return; } offset = lseek(fdout, 0, SEEK_END); offset -= (*bytes + 2); offset = lseek(fdout, offset, SEEK_SET); ftruncate(fdout, offset); if (offset == 0) break; } return; }