From eafa6ebf10924ec36cb5d28fd9d97a23de089bd0 Mon Sep 17 00:00:00 2001 From: Pádraig Brady Date: Mon, 6 Oct 2014 10:19:58 +0100 Subject: cp: avoid speculative preallocation with --sparse=always With --sparse=always use fallocate(...PUNCH_HOLE...) to avoid any permanent allocation due to speculative preallocation employed by file systems such as XFS. * m4/jm-macros.m4: Check for and fallocate(). * src/copy.c (punch_hole): A new function to try and punch a hole at the specified offset if supported. (create_hole): Call punch_hole() after requesting a hole. (extent_copy): Likewise. * NEWS: Mention the improvement. --- NEWS | 2 +- m4/jm-macros.m4 | 2 ++ src/copy.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++----------- 3 files changed, 65 insertions(+), 15 deletions(-) diff --git a/NEWS b/NEWS index 700707039..e7aef7725 100644 --- a/NEWS +++ b/NEWS @@ -34,7 +34,7 @@ GNU coreutils NEWS -*- outline -*- ** Improvements cp,install,mv will convert smaller runs of NULs in the input to holes, - to reduce allocation in the copy. + and cp --sparse=always avoids speculative preallocation on XFS for example. mv will try a reflink before falling back to a standard copy, which is more efficient when moving files across BTRFS subvolume boundaries. diff --git a/m4/jm-macros.m4 b/m4/jm-macros.m4 index a96ecabb6..07b90850c 100644 --- a/m4/jm-macros.m4 +++ b/m4/jm-macros.m4 @@ -78,6 +78,7 @@ AC_DEFUN([coreutils_MACROS], AC_CHECK_FUNCS_ONCE([ endgrent endpwent + fallocate fchown fchmod ftruncate @@ -189,6 +190,7 @@ AC_DEFUN([gl_CHECK_ALL_HEADERS], [ AC_CHECK_HEADERS_ONCE([ hurd.h + linux/falloc.h paths.h priv.h stropts.h diff --git a/src/copy.c b/src/copy.c index 24b8af3e6..85a4c5965 100644 --- a/src/copy.c +++ b/src/copy.c @@ -70,6 +70,10 @@ # include "verror.h" #endif +#if HAVE_LINUX_FALLOC_H +# include +#endif + #ifndef HAVE_FCHOWN # define HAVE_FCHOWN false # define fchown(fd, uid, gid) (-1) @@ -145,20 +149,54 @@ utimens_symlink (char const *file, struct timespec const *timespec) return err; } -/* Create a hole at the end of a file. */ +/* Attempt to punch a hole to avoid any permanent + speculative preallocation on file systems such as XFS. + Return values as per fallocate(2) except ENOSYS etc. are ignored. */ + +static int +punch_hole (int fd, off_t offset, off_t length) +{ + int ret = 0; +#if HAVE_FALLOCATE +# if defined FALLOC_FL_PUNCH_HOLE && defined FALLOC_FL_KEEP_SIZE + ret = fallocate (fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, + offset, length); + if (ret < 0 + && (errno == EOPNOTSUPP || errno == ENOTSUP || errno == ENOSYS)) + ret = 0; +# endif +#endif + return ret; +} + +/* Create a hole at the end of a file, + avoiding preallocation if requested. */ static bool -create_hole (int fd, char const *name, off_t size) +create_hole (int fd, char const *name, bool punch_holes, off_t size) { - if (lseek (fd, size, SEEK_CUR) < 0) + off_t file_end = lseek (fd, size, SEEK_CUR); + + if (file_end < 0) { error (0, errno, _("cannot lseek %s"), quote (name)); return false; } + /* Some file systems (like XFS) preallocate when write extending a file. + I.E. a previous write() may have preallocated extra space + that the seek above will not discard. A subsequent write() could + then make this allocation permanent. */ + if (punch_holes && punch_hole (fd, file_end - size, size) < 0) + { + error (0, errno, _("error deallocating %s"), quote (name)); + return false; + } + return true; } + /* Copy the regular file open on SRC_FD/SRC_NAME to DST_FD/DST_NAME, honoring the MAKE_HOLES setting and using the BUF_SIZE-byte buffer BUF for temporary storage. Copy no more than MAX_N_READ bytes. @@ -172,7 +210,7 @@ create_hole (int fd, char const *name, off_t size) bytes read. */ static bool sparse_copy (int src_fd, int dest_fd, char *buf, size_t buf_size, - size_t hole_size, bool make_holes, + size_t hole_size, bool punch_holes, char const *src_name, char const *dst_name, uintmax_t max_n_read, off_t *total_n_read, bool *last_write_made_hole) @@ -198,7 +236,7 @@ sparse_copy (int src_fd, int dest_fd, char *buf, size_t buf_size, *total_n_read += n_read; /* Loop over the input buffer in chunks of hole_size. */ - size_t csize = make_holes ? hole_size : buf_size; + size_t csize = hole_size ? hole_size : buf_size; char *cbuf = buf; char *pbuf = buf; @@ -207,7 +245,7 @@ sparse_copy (int src_fd, int dest_fd, char *buf, size_t buf_size, bool prev_hole = make_hole; csize = MIN (csize, n_read); - if (make_holes && csize) + if (hole_size && csize) { /* Setup sentinel required by is_nul(). */ typedef uintptr_t word; @@ -238,7 +276,7 @@ sparse_copy (int src_fd, int dest_fd, char *buf, size_t buf_size, } else { - if (! create_hole (dest_fd, dst_name, psize)) + if (! create_hole (dest_fd, dst_name, punch_holes, psize)) return false; } @@ -281,7 +319,7 @@ sparse_copy (int src_fd, int dest_fd, char *buf, size_t buf_size, /* Ensure a trailing hole is created, so that subsequent calls of sparse_copy() start at the correct offset. */ - if (make_hole && ! create_hole (dest_fd, dst_name, psize)) + if (make_hole && ! create_hole (dest_fd, dst_name, punch_holes, psize)) return false; else return true; @@ -421,7 +459,9 @@ extent_copy (int src_fd, int dest_fd, char *buf, size_t buf_size, if ((empty_extent && sparse_mode == SPARSE_ALWAYS) || (!empty_extent && sparse_mode != SPARSE_NEVER)) { - if (! create_hole (dest_fd, dst_name, ext_hole_size)) + if (! create_hole (dest_fd, dst_name, + sparse_mode == SPARSE_ALWAYS, + ext_hole_size)) goto fail; wrote_hole_at_eof = true; } @@ -465,9 +505,9 @@ extent_copy (int src_fd, int dest_fd, char *buf, size_t buf_size, empty_extent = false; last_ext_len = ext_len; - if ( ! sparse_copy (src_fd, dest_fd, buf, buf_size, hole_size, - sparse_mode == SPARSE_ALWAYS, - src_name, dst_name, ext_len, &n_read, + if ( ! sparse_copy (src_fd, dest_fd, buf, buf_size, + sparse_mode == SPARSE_ALWAYS ? hole_size: 0, + true, src_name, dst_name, ext_len, &n_read, &wrote_hole_at_eof)) goto fail; @@ -509,6 +549,13 @@ extent_copy (int src_fd, int dest_fd, char *buf, size_t buf_size, return false; } + if (sparse_mode == SPARSE_ALWAYS && dest_pos < src_total_size + && punch_hole (dest_fd, dest_pos, src_total_size - dest_pos) < 0) + { + error (0, errno, _("error deallocating %s"), quote (dst_name)); + return false; + } + return true; } @@ -1236,8 +1283,9 @@ copy_reg (char const *src_name, char const *dst_name, off_t n_read; bool wrote_hole_at_eof; - if ( ! sparse_copy (source_desc, dest_desc, buf, buf_size, hole_size, - make_holes, src_name, dst_name, + if ( ! sparse_copy (source_desc, dest_desc, buf, buf_size, + make_holes ? hole_size : 0, + x->sparse_mode == SPARSE_ALWAYS, src_name, dst_name, UINTMAX_MAX, &n_read, &wrote_hole_at_eof) || (wrote_hole_at_eof -- cgit v1.2.3-54-g00ecf