From f2394ab694c0bd4b9c3913ea79ba6937775794a5 Mon Sep 17 00:00:00 2001 From: Martin Matuska Date: Thu, 4 Sep 2025 01:18:33 +0200 Subject: [PATCH] md5: support all OpenSSL EVP digests Add option "-c digest" to use any of supported EVP message digests inplace of insecure MD5. This generates checksum files named by the digest by default, e.g .SHA256.CHECKSUMS For backwards compatibility -m translates to -c md5 The "-M filename" option to rename the checksums file remains untouched --- cpdup.1 | 43 +++++++++++++++++++--------- src/cpdup.c | 81 ++++++++++++++++++++++++++++++++++++++++++----------- src/cpdup.h | 8 ++++-- src/md5.c | 41 +++++++++++++-------------- src/misc.c | 16 +++++++---- 5 files changed, 131 insertions(+), 58 deletions(-) diff --git a/cpdup.1 b/cpdup.1 index 7c80624..0b48f41 100644 --- a/cpdup.1 +++ b/cpdup.1 @@ -12,6 +12,7 @@ .Nm .Op Fl C .Op Fl v Ns Op Cm v Ns Op Cm v +.Op Fl c .Op Fl d .Op Fl n .Op Fl u @@ -73,6 +74,27 @@ modifications made to the destination. .Fl vvv will cause all files and directories to be reported whether or not modifications are made. +.It Fl c Ar digest +Generate and maintain a checksum file using the specified message +.Ar digest +called +.Pa \&.DIGEST.CHECKSUMS +in each directory on the source where +.Pa DIGEST +is replaced by the upperscale name of the message digest used. +An alternate file name may be specified with the +.Fl M Ar file +option. Supported are all localy available OpenSSL +.Xr EVP 7 +message digests, e.g. md5, rmd160, sha1, sha256 or sha512. +A checksum check is done on each file of the destination when the destination +appears to be the same as the source. If the check fails, +the source is recopied to the destination. When you specify a destination +directory, the checksum file is only updated as needed and may not be updated +even if modifications are made to a source file. If you do not specify a +destination directory the +.Nm +command forcefully regenerates the checksum for every file in the source. .It Fl d Print directories as they are being traversed. Useful to watch the progress; @@ -120,21 +142,14 @@ Quiet operation. .It Fl o Do not remove any files, just overwrite/add. .It Fl m -Generate and maintain a MD5 checkfile called -.Pa \&.MD5.CHECKSUMS -in each directory on the source -and do an MD5 check on each file of the destination when the destination -appears to be the same as the source. If the check fails, -the source is recopied to the destination. When you specify a destination -directory, the MD5 checkfile is only updated as needed and may not be updated -even if modifications are made to a source file. If you do not specify a -destination directory the -.Nm -command forcefully regenerates the MD5 checkfile for every file in the source. -.It Fl M Ar file Works the same as -.Fl m -but allows you to specify the name of the MD5 checkfile. +.Fl c Ar md5 +for compatibility purposes +.It Fl M Ar file +allows you to specify the name of the checksum file generated by options +.Fl c +or +.Fl m. .It Fl H Ar path .Nm will create a hardlink from a file found under diff --git a/src/cpdup.c b/src/cpdup.c index 7d1120a..b685017 100644 --- a/src/cpdup.c +++ b/src/cpdup.c @@ -59,7 +59,8 @@ * standard wildcarded ( ? / * style, NOT regex) exclusions. * - tries to play permissions and flags smart in regards to overwriting * schg files and doing related stuff. - * - Can do MD5 consistancy checks + * - Can do checksum consistancy checks with any supported OpenSSL EVP + * message digest * - Is able to do incremental mirroring/backups via hardlinks from * the 'previous' version (supplied with -H path). */ @@ -67,6 +68,7 @@ #include "cpdup.h" #include "hclink.h" #include "hcproto.h" +#include #define HSIZE 8192 #define HMASK (HSIZE-1) @@ -188,10 +190,17 @@ int64_t CountLinkedItems; static struct HostConf SrcHost; static struct HostConf DstHost; +#ifndef NOMD5 +const EVP_MD *CsumAlgo; +#endif + int main(int ac, char **av) { int i; +#ifndef NOMD5 + int len; +#endif int opt; char *src = NULL; char *dst = NULL; @@ -199,12 +208,25 @@ main(int ac, char **av) struct timeval start; struct copy_info info; +#ifndef NOMD5 + char *MD5AlgoStr; + char *MD5CacheFileStr; + const char *CsumAlgoStrArg = NULL; + const char *MD5CacheFileArg = NULL; +#endif + signal(SIGPIPE, SIG_IGN); gettimeofday(&start, NULL); opterr = 0; - while ((opt = getopt(ac, av, ":CdF:fH:hIi:j:lM:mnoqRSs:uVvX:x")) != -1) { + while ((opt = getopt(ac, av, ":c:CdF:fH:hIi:j:lM:mnoqRSs:uVvX:x")) != -1) { switch (opt) { + case 'c': + UseMD5Opt = 1; +#ifndef NOMD5 + CsumAlgoStrArg = optarg; +#endif + break; case 'C': CompressOpt = 1; break; @@ -241,11 +263,17 @@ main(int ac, char **av) break; case 'M': UseMD5Opt = 1; - MD5CacheFile = optarg; + if (strnlen(optarg, PATH_MAX) == PATH_MAX) + fatal("Cache file string too long"); +#ifndef NOMD5 + MD5CacheFileArg = optarg; +#endif break; case 'm': UseMD5Opt = 1; - MD5CacheFile = ".MD5.CHECKSUMS"; +#ifndef NOMD5 + CsumAlgoStrArg = "MD5"; +#endif break; case 'n': NotForRealOpt = 1; @@ -303,6 +331,27 @@ main(int ac, char **av) if (ac > 2) fatal("too many arguments"); +#ifndef NOMD5 + if (UseMD5Opt) { + if (CsumAlgoStrArg == NULL) + CsumAlgoStrArg = "MD5"; + CsumAlgo = EVP_get_digestbyname(CsumAlgoStrArg); + if (CsumAlgo == NULL) + fatal("Unknown digest algorithm: %s", CsumAlgoStrArg); + len = strlen(CsumAlgoStrArg); + CsumAlgoStr = malloc(len + 1); + for (i = 0; i < len; i++) + CsumAlgoStr[i] = toupper(CsumAlgoStrArg[i]); + CsumAlgoStr[i] = '\0'; + if (MD5CacheFileArg == NULL) { + if (asprintf(&MD5CacheFileStr, ".%s.CHECKSUMS", CsumAlgoStr) < 0) + fatal("Memory allocation error\n"); + } else + MD5CacheFileStr = strdup(MD5CacheFileArg); + MD5CacheFile = MD5CacheFileStr; + } +#endif + /* * If we are told to go into slave mode, run the HC protocol */ @@ -320,7 +369,7 @@ main(int ac, char **av) SrcHost.host = src; src = ptr; if (UseMD5Opt) - fatal("The MD5 options are not currently supported for remote sources"); + fatal("The checksum options are not currently supported for remote sources"); if (hc_connect(&SrcHost, ReadOnlyOpt) < 0) exit(1); } else { @@ -339,8 +388,8 @@ main(int ac, char **av) } /* - * dst may be NULL only if -m option is specified, - * which forces an update of the MD5 checksums + * dst may be NULL only if -c checksum or -m option is specified, + * which forces an update of the checksums */ if (dst == NULL && UseMD5Opt == 0) { fatal(NULL); @@ -830,7 +879,7 @@ DoCopy(copy_info_t info, struct stat *stat1, int depth) OwnerMatch(stat1, &st2) #ifndef NOMD5 && (UseMD5Opt == 0 || !S_ISREG(stat1->st_mode) || - (mres = md5_check(spath, dpath)) == 0) + (mres = md5_check(CsumAlgo, spath, dpath)) == 0) #endif && (ValidateOpt == 0 || !S_ISREG(stat1->st_mode) || validate_check(spath, dpath) == 0) @@ -858,7 +907,7 @@ DoCopy(copy_info_t info, struct stat *stat1, int depth) if (VerboseOpt >= 3) { #ifndef NOMD5 if (UseMD5Opt) { - logstd("%-32s md5-nochange", + logstd("%-32s checksum-nochange", (dpath ? dpath : spath)); } else #endif @@ -1063,22 +1112,22 @@ DoCopy(copy_info_t info, struct stat *stat1, int depth) } } else if (dpath == NULL) { /* - * If dpath is NULL, we are just updating the MD5 + * If dpath is NULL, we are just updating the checksum */ #ifndef NOMD5 if (UseMD5Opt && S_ISREG(stat1->st_mode)) { - mres = md5_update(spath); + mres = md5_update(CsumAlgo, spath); if (mres < 0) { - logerr("%-32s md5-CHECK-FAILED\n", spath); + logerr("%-32s checksum-CHECK-FAILED\n", spath); } else { if (VerboseOpt > 1) { if (mres > 0) - logstd("%-32s md5-update\n", spath); + logstd("%-32s checksum-update\n", spath); else - logstd("%-32s md5-ok\n", spath); + logstd("%-32s checksum-ok\n", spath); } else if (!QuietOpt && mres > 0) { - logstd("%-32s md5-update\n", spath); + logstd("%-32s checksum-update\n", spath); } } } @@ -1099,7 +1148,7 @@ DoCopy(copy_info_t info, struct stat *stat1, int depth) * Handle check failure message. */ if (mres < 0) - logerr("%-32s md5-CHECK-FAILED\n", (dpath) ? dpath : spath); + logerr("%-32s checksum-CHECK-FAILED\n", (dpath) ? dpath : spath); #endif /* diff --git a/src/cpdup.h b/src/cpdup.h index 68e2435..1a7e4fe 100644 --- a/src/cpdup.h +++ b/src/cpdup.h @@ -52,6 +52,10 @@ #include #include +#ifndef NOMD5 +#include +#endif + #ifdef __linux /* @@ -100,8 +104,8 @@ int32_t hc_bswap32(int32_t var); int64_t hc_bswap64(int64_t var); #ifndef NOMD5 -int md5_update(const char *spath); -int md5_check(const char *spath, const char *dpath); +int md5_update(const EVP_MD *algo, const char *spath); +int md5_check(const EVP_MD *algo, const char *spath, const char *dpath); void md5_flush(void); #endif diff --git a/src/md5.c b/src/md5.c index 710f1e3..e71f230 100644 --- a/src/md5.c +++ b/src/md5.c @@ -33,8 +33,6 @@ #include "cpdup.h" -#include - typedef struct MD5Node { struct MD5Node *md_Next; char *md_Name; @@ -45,7 +43,8 @@ typedef struct MD5Node { static MD5Node *md5_lookup(const char *spath); static void md5_cache(const char *spath, int sdirlen); static void md5_load(FILE *fi); -static int md5_file(const char *filename, char *buf, int is_target); +static int md5_file(const EVP_MD *algo, const char *filename, char *buf, + int is_target); static char *MD5SCache; /* cache source directory name */ static MD5Node *MD5Base; @@ -68,7 +67,7 @@ md5_flush(void) } fclose(fo); } else { - logerr("Error writing MD5 Cache (%s): %s\n", + logerr("Error writing checksum cache (%s): %s\n", MD5SCache, strerror(errno)); } } @@ -119,13 +118,13 @@ md5_cache(const char *spath, int sdirlen) md5_load(fi); fclose(fi); } else if (errno != ENOENT) { - logerr("Error reading MD5 Cache (%s): %s\n", + logerr("Error reading checksum cache (%s): %s\n", MD5SCache, strerror(errno)); } } /* - * md5_lookup: lookup/create md5 entry + * md5_lookup: lookup/create checksum entry */ static MD5Node * md5_lookup(const char *spath) @@ -160,14 +159,14 @@ md5_lookup(const char *spath) } /* - * md5_update: force update the source MD5 file. + * md5_update: force update the source checksum file. * * Return -1 if failed * Return 0 if up-to-date * Return 1 if updated */ int -md5_update(const char *spath) +md5_update(const EVP_MD *algo, const char *spath) { char scode[EVP_MAX_MD_SIZE * 2 + 1]; int r; @@ -175,7 +174,7 @@ md5_update(const char *spath) node = md5_lookup(spath); - if (md5_file(spath, scode, 0 /* is_target */) == 0) { + if (md5_file(algo, spath, scode, 0 /* is_target */) == 0) { r = 0; if (strcmp(scode, node->md_Code) != 0) { r = 1; @@ -190,14 +189,14 @@ md5_update(const char *spath) } /* - * md5_check: check MD5 against file + * md5_check: check checksum against file * * Return -1 if check failed * Return 0 if source and dest files are identical * Return 1 if source and dest files are not identical */ int -md5_check(const char *spath, const char *dpath) +md5_check(const EVP_MD *algo, const char *spath, const char *dpath) { char scode[EVP_MAX_MD_SIZE * 2 + 1]; char dcode[EVP_MAX_MD_SIZE * 2 + 1]; @@ -207,16 +206,16 @@ md5_check(const char *spath, const char *dpath) node = md5_lookup(spath); /* - * The .MD5* file is used as a cache. + * The .[checksum]* file is used as a cache. */ - if (md5_file(dpath, dcode, 1 /* is_target */) == 0) { + if (md5_file(algo, dpath, dcode, 1 /* is_target */) == 0) { r = 0; if (strcmp(node->md_Code, dcode) != 0) { r = 1; /* * Update the source digest code and recheck. */ - if (md5_file(spath, scode, 0 /* is_target */) == 0) { + if (md5_file(algo, spath, scode, 0 /* is_target */) == 0) { if (strcmp(node->md_Code, scode) != 0) { memcpy(node->md_Code, scode, sizeof(scode)); MD5SCacheDirty = 1; @@ -239,7 +238,7 @@ md5_check(const char *spath, const char *dpath) * >= (EVP_MAX_MD_SIZE * 2 + 1). */ static int -md5_file(const char *filename, char *buf, int is_target) +md5_file(const EVP_MD *algo, const char *filename, char *buf, int is_target) { static const char hex[] = "0123456789abcdef"; unsigned char digest[EVP_MAX_MD_SIZE]; @@ -264,7 +263,7 @@ md5_file(const char *filename, char *buf, int is_target) #endif if (ctx == NULL) goto err; - if (!EVP_DigestInit_ex(ctx, EVP_md5(), NULL)) + if (!EVP_DigestInit_ex(ctx, algo, NULL)) goto err; size = st.st_size; @@ -365,7 +364,7 @@ md5_load(FILE *fi) c = get_field(fi, c, node->md_Code, sizeof(node->md_Code)); if (c != ' ') { - logerr("Error parsing MD5 Cache (%s): invalid digest code (%c)\n", + logerr("Error parsing checksum cache (%s): invalid digest code (%c)\n", MD5SCache, c); goto next; } @@ -373,13 +372,13 @@ md5_load(FILE *fi) c = fgetc(fi); c = get_field(fi, c, nbuf, sizeof(nbuf)); if (c != ' ') { - logerr("Error parsing MD5 Cache (%s): invalid length (%c)\n", + logerr("Error parsing checksum cache (%s): invalid length (%c)\n", MD5SCache, c); goto next; } nlen = (int)strtol(nbuf, &endp, 10); if (*endp != '\0' || nlen == 0) { - logerr("Error parsing MD5 Cache (%s): invalid length (%s)\n", + logerr("Error parsing checksum cache (%s): invalid length (%s)\n", MD5SCache, nbuf); goto next; } @@ -390,7 +389,7 @@ md5_load(FILE *fi) for (n = 0; n < nlen; n++) { c = fgetc(fi); if (c == EOF) { - logerr("Error parsing MD5 Cache (%s): invalid filename\n", + logerr("Error parsing checksum cache (%s): invalid filename\n", MD5SCache); goto next; } @@ -399,7 +398,7 @@ md5_load(FILE *fi) c = fgetc(fi); if (c != '\n' && c != EOF) { - logerr("Error parsing MD5 Cache (%s): trailing garbage (%c)\n", + logerr("Error parsing checksum cache (%s): trailing garbage (%c)\n", MD5SCache, c); while (c != EOF && c != '\n') c = fgetc(fi); diff --git a/src/misc.c b/src/misc.c index 0bfa0dd..f572eb0 100644 --- a/src/misc.c +++ b/src/misc.c @@ -160,6 +160,13 @@ fatal(const char *ctl, ...) puts("usage: cpdup [options] src dest"); puts("\n" "options:\n" +#ifndef NOMD5 + " -c digest maintain/generate checksum file on source,\n" + " and compare with (optional) destination,\n" + " copying if compare fails.\n" + " Digest may be any message digest supported by\n" + " OpenSSL EVP e.g. md5, sha1, sha256, sha512\n" +#endif " -C request compressed ssh link if remote operation\n" " -d print directories being traversed\n" " -f force update even if files look the same\n" @@ -172,11 +179,10 @@ fatal(const char *ctl, ...) " -l force line-buffered stdout/stderr" ); #ifndef NOMD5 - puts(" -m maintain/generate MD5 checkfile on source,\n" - " and compare with (optional) destination,\n" - " copying if the compare fails\n" - " -M file -m+specify MD5 checkfile, else .MD5_CHECKSUMS\n" - " copy if md5 check fails" + puts(" -m equals to -c md5 for compatibility purposes\n" + " -M file checksum file name, default .[DIGEST].CHECKSUMS\n" + " where DIGEST is the name of the message digest used." + " Copy if checksum check fails" ); #endif puts(" -n do not make any real changes to the target\n"