du: Dedup hardlinks - sbase - suckless unix tools | |
git clone git://git.suckless.org/sbase | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
commit e8fe04c543eab13e892beda05cb9bccb9e4f441e | |
parent 39a4c55378294437627421571a51b64bd5e09623 | |
Author: remph <[email protected]> | |
Date: Mon, 3 Mar 2025 19:52:37 +0100 | |
du: Dedup hardlinks | |
Conform to POSIX, which says `Files with multiple links shall be counted | |
and written for only one entry,' in the 2008[1] and 2013[2] editions, and | |
uses more words to say the same thing in the 2017[3] and 2024[4] editions. | |
This patch also keeps inodes between operands and dedups symlinks if | |
applicable, which are implementation-defined in 2017 and required in 2024. | |
See also the `RATIONALE' section in the 2024 edition. | |
[1] https://pubs.opengroup.org/onlinepubs/9699919799.2008edition/utilities/du.h… | |
[2] https://pubs.opengroup.org/onlinepubs/9699919799.2013edition/utilities/du.h… | |
[3] https://pubs.opengroup.org/onlinepubs/9699919799/utilities/du.html | |
[4] https://pubs.opengroup.org/onlinepubs/9799919799/utilities/du.html | |
Diffstat: | |
M du.c | 53 +++++++++++++++++++++++++++++… | |
1 file changed, 52 insertions(+), 1 deletion(-) | |
--- | |
diff --git a/du.c b/du.c | |
@@ -5,6 +5,7 @@ | |
#include <errno.h> | |
#include <fcntl.h> | |
#include <limits.h> | |
+#include <search.h> | |
#include <stdint.h> | |
#include <stdlib.h> | |
#include <stdio.h> | |
@@ -20,6 +21,11 @@ static int aflag = 0; | |
static int sflag = 0; | |
static int hflag = 0; | |
+struct file { | |
+ dev_t devno; | |
+ ino_t inode; | |
+}; | |
+ | |
static void | |
printpath(off_t n, const char *path) | |
{ | |
@@ -35,16 +41,61 @@ nblks(blkcnt_t blocks) | |
return (512 * blocks + blksize - 1) / blksize; | |
} | |
+static int | |
+cmp(const void *p1, const void *p2) | |
+{ | |
+ const struct file *f1 = p1, *f2 = p2; | |
+ | |
+ if (f1->devno > f2->devno) | |
+ return -1; | |
+ if (f1->devno < f2->devno) | |
+ return 1; | |
+ | |
+ /* f1->devno == f2->devno */ | |
+ if (f1->inode < f2->inode) | |
+ return -1; | |
+ if (f1->inode > f2->inode) | |
+ return 1; | |
+ | |
+ return 0; | |
+} | |
+ | |
+static int | |
+duplicated(dev_t dev, ino_t ino) | |
+{ | |
+ static void *tree; | |
+ struct file **fpp, *fp, file = {dev, ino}; | |
+ | |
+ if ((fpp = tsearch(&file, &tree, cmp)) == NULL) | |
+ eprintf("%s:", argv0); | |
+ | |
+ if (*fpp != &file) | |
+ return 1; | |
+ | |
+ /* new file added */ | |
+ fp = emalloc(sizeof(*fp)); | |
+ *fp = file; | |
+ *fpp = fp; | |
+ | |
+ return 0; | |
+} | |
+ | |
static void | |
du(int dirfd, const char *path, struct stat *st, void *data, struct recursor *… | |
{ | |
off_t *total = data, subtotal; | |
subtotal = nblks(st->st_blocks); | |
- if (S_ISDIR(st->st_mode)) | |
+ if (S_ISDIR(st->st_mode)) { | |
recurse(dirfd, path, &subtotal, r); | |
+ } else if (r->follow != 'P' || st->st_nlink > 1) { | |
+ if (duplicated(st->st_dev, st->st_ino)) | |
+ goto print; | |
+ } | |
+ | |
*total += subtotal; | |
+print: | |
if (!r->depth) | |
printpath(*total, r->path); | |
else if (!sflag && r->depth <= maxdepth && (S_ISDIR(st->st_mode) || af… |