Skip to content

Commit 737edcf

Browse files
authored
Merge pull request #22 from gpcimino/feature/copy-if-newer
Add copy_if_newer parameter to functions in copy.py
2 parents 6579306 + 3b85c19 commit 737edcf

5 files changed

Lines changed: 505 additions & 11 deletions

File tree

.gitignore

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,3 +89,8 @@ ENV/
8989

9090
# Rope project settings
9191
.ropeproject
92+
93+
94+
#PyCharm
95+
96+
.idea/

fs/copy.py

Lines changed: 192 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,10 @@
1313
from .path import combine
1414
from .path import frombase
1515
from .path import normpath
16+
from .errors import FSError
1617

1718

18-
def copy_fs(src_fs, dst_fs, walker=None):
19+
def copy_fs(src_fs, dst_fs, walker=None, on_copy=None):
1920
"""
2021
Copy the contents of one filesystem to another.
2122
@@ -29,17 +30,72 @@ def copy_fs(src_fs, dst_fs, walker=None):
2930
in ``src_fs``. Set this if you only want to consider a sub-set
3031
of the resources in ``src_fs``.
3132
:type walker: :class:`~fs.walk.Walker`
33+
:param on_copy: A function callback called after a single file copy is executed.
34+
:type on_copy: Function, with signature ``(src_fs, src_path, dst_fs, dst_path)``.
35+
"""
36+
return copy_dir(src_fs, '/', dst_fs, '/', walker=walker, on_copy=on_copy)
37+
38+
39+
def copy_fs_if_newer(src_fs, dst_fs, walker=None, on_copy=None):
40+
"""
41+
Copy the contents of one filesystem to another. If both source and destination files exist,
42+
the copy is executed only if the source file is newer than the destination file.
43+
In case modification times of source or destination files are not available,
44+
copy file is always executed.
45+
46+
:param src_fs: Source filesystem.
47+
:type src_fs: :type src_fs: FS URL or instance
48+
:param src_path: A path to a directory on ``src_fs``.
49+
:type src_path: str
50+
:param dst_fs: Destination filesystem.
51+
:type dst_fs: FS URL or instance
52+
:param walker: A walker object that will be used to scan for files
53+
in ``src_fs``. Set this if you only want to consider a sub-set
54+
of the resources in ``src_fs``.
55+
:type walker: :class:`~fs.walk.Walker`
56+
:param on_copy: A function callback called after a single file copy is executed.
57+
:type on_copy: Function, with signature ``(src_fs, src_path, dst_fs, dst_path)``.
58+
"""
59+
return copy_dir_if_newer(src_fs, '/', dst_fs, '/', walker=walker, on_copy=on_copy)
60+
3261

62+
def _source_is_newer(src_fs, src_path, dst_fs, dst_path):
3363
"""
34-
copy_dir(src_fs, '/', dst_fs, '/', walker=walker)
64+
Determine if source file is newer than destination file.
65+
66+
:param src_fs: Source filesystem.
67+
:type src_fs: :type src_fs: FS URL or instance
68+
:param src_path: A path to a directory on ``src_fs``.
69+
:type src_path: str
70+
:param dst_fs: Destination filesystem.
71+
:type dst_fs: FS URL or instance
72+
:returns: True if source file is newer than destination file or
73+
file modification time cannot be determined. False otherwise.
74+
"""
75+
try:
76+
if not dst_fs.exists(dst_path):
77+
return True
78+
else:
79+
namespace = ('details', 'modified')
80+
81+
src_modified = src_fs.getinfo(src_path, namespace).modified
82+
if src_modified is None:
83+
return True
84+
85+
dst_modified = dst_fs.getinfo(dst_path, namespace).modified
86+
if dst_modified is None:
87+
return True
3588

89+
return src_modified > dst_modified
90+
except FSError:
91+
#todo: should log something here
92+
#log.error("cannot determine if source file " + src_path + " is newer than destination file " + dst_path + ", thus safely copy the file')
93+
return True
3694

3795
def copy_file(src_fs, src_path, dst_fs, dst_path):
3896
"""
3997
Copy a file from one filesystem to another.
40-
41-
If the destination exists, and is a file, it will be first
42-
truncated.
98+
If the destination exists, and is a file, it will be first truncated.
4399
44100
:param src_fs: Source filesystem.
45101
:type src_fs: FS URL or instance
@@ -49,19 +105,65 @@ def copy_file(src_fs, src_path, dst_fs, dst_path):
49105
:type dst_fs: FS URL or instance
50106
:param dst_path: Path to a file on ``dst_fs``.
51107
:type dst_path: str
108+
:returns: True if the file copy was executed, False otherwise.
52109
53110
"""
54111
with manage_fs(src_fs, writeable=False) as src_fs:
55112
with manage_fs(dst_fs, create=True) as dst_fs:
56113
if src_fs is dst_fs:
57114
# Same filesystem, so we can do a potentially optimized copy
58115
src_fs.copy(src_path, dst_path, overwrite=True)
116+
return True
59117
else:
60118
# Standard copy
61119
with src_fs.lock(), dst_fs.lock():
62120
with src_fs.open(src_path, 'rb') as read_file:
63121
# There may be an optimized copy available on dst_fs
64122
dst_fs.setbinfile(dst_path, read_file)
123+
return True
124+
125+
126+
127+
def copy_file_if_newer(src_fs, src_path, dst_fs, dst_path):
128+
"""
129+
Copy a file from one filesystem to another.
130+
If the destination exists, and is a file, it will be first truncated.
131+
If both source and destination files exist,
132+
the copy is executed only if the source file is newer than the destination file.
133+
In case modification times of source or destination files are not available,
134+
copy is always executed.
135+
136+
:param src_fs: Source filesystem.
137+
:type src_fs: FS URL or instance
138+
:param src_path: Path to a file on ``src_fs``.
139+
:type src_path: str
140+
:param dst_fs: Destination filesystem.
141+
:type dst_fs: FS URL or instance
142+
:param dst_path: Path to a file on ``dst_fs``.
143+
:type dst_path: str
144+
:returns: True if the file copy was executed, False otherwise.
145+
146+
"""
147+
with manage_fs(src_fs, writeable=False) as src_fs:
148+
with manage_fs(dst_fs, create=True) as dst_fs:
149+
if src_fs is dst_fs:
150+
# Same filesystem, so we can do a potentially optimized copy
151+
if _source_is_newer(src_fs, src_path, dst_fs, dst_path):
152+
src_fs.copy(src_path, dst_path, overwrite=True)
153+
return True
154+
else:
155+
return False
156+
else:
157+
# Standard copy
158+
with src_fs.lock(), dst_fs.lock():
159+
if _source_is_newer(src_fs, src_path, dst_fs, dst_path):
160+
with src_fs.open(src_path, 'rb') as read_file:
161+
# There may be an optimized copy available on dst_fs
162+
dst_fs.setbinfile(dst_path, read_file)
163+
return True
164+
else:
165+
return False
166+
65167

66168

67169
def copy_structure(src_fs, dst_fs, walker=None):
@@ -86,7 +188,7 @@ def copy_structure(src_fs, dst_fs, walker=None):
86188
dst_fs.makedir(dir_path, recreate=True)
87189

88190

89-
def copy_dir(src_fs, src_path, dst_fs, dst_path, walker=None):
191+
def copy_dir(src_fs, src_path, dst_fs, dst_path, walker=None, on_copy=None):
90192
"""
91193
Copy a directory from one filesystem to another.
92194
@@ -101,13 +203,14 @@ def copy_dir(src_fs, src_path, dst_fs, dst_path, walker=None):
101203
in ``src_fs``. Set this if you only want to consider a sub-set
102204
of the resources in ``src_fs``.
103205
:type walker: :class:`~fs.walk.Walker`
206+
:param on_copy: A function callback called after a single file copy is executed.
207+
:type on_copy: Function, with signature ``(src_fs, src_path, dst_fs, dst_path)``.
104208
105209
"""
106210

107211
walker = walker or Walker()
108212
_src_path = abspath(normpath(src_path))
109213
_dst_path = abspath(normpath(dst_path))
110-
111214
with manage_fs(src_fs, writeable=False) as src_fs:
112215
with manage_fs(dst_fs, create=True) as dst_fs:
113216
with src_fs.lock(), dst_fs.lock():
@@ -123,9 +226,90 @@ def copy_dir(src_fs, src_path, dst_fs, dst_path, walker=None):
123226
recreate=True
124227
)
125228
for info in files:
126-
copy_file(
229+
file_copied = copy_file(
127230
src_fs,
128231
info.make_path(dir_path),
129232
dst_fs,
130233
info.make_path(copy_path)
131234
)
235+
if file_copied:
236+
if on_copy is not None:
237+
on_copy(src_fs, dir_path, dst_fs, copy_path)
238+
239+
240+
def copy_dir_if_newer(src_fs, src_path, dst_fs, dst_path, walker=None, on_copy=None):
241+
"""
242+
Copy a directory from one filesystem to another. If both source and destination files exist,
243+
the copy is executed only if the source file is newer than the destination file.
244+
In case modification times of source or destination files are not available,
245+
copy is always executed.
246+
247+
:param src_fs: Source filesystem.
248+
:type src_fs: FS URL or instance
249+
:param src_path: A path to a directory on ``src_fs``.
250+
:type src_path: str
251+
:param dst_fs: Destination filesystem.
252+
:type dst_fs: FS URL or instance
253+
:param str dst_path: A path to a directory on ``dst_fs``.
254+
:param walker: A walker object that will be used to scan for files
255+
in ``src_fs``. Set this if you only want to consider a sub-set
256+
of the resources in ``src_fs``.
257+
:type walker: :class:`~fs.walk.Walker`
258+
:param on_copy: A function callback called after a single file copy is executed.
259+
:type on_copy: Function, with signature ``(src_fs, src_path, dst_fs, dst_path)``.
260+
261+
"""
262+
walker = walker or Walker()
263+
_src_path = abspath(normpath(src_path))
264+
_dst_path = abspath(normpath(dst_path))
265+
with manage_fs(src_fs, writeable=False) as src_fs:
266+
with manage_fs(dst_fs, create=True) as dst_fs:
267+
with src_fs.lock(), dst_fs.lock():
268+
dst_fs.makedir(_dst_path, recreate=True)
269+
namespace = ('details', 'modified')
270+
dst_state = {
271+
filepath: info
272+
for filepath, info in walker.info(dst_fs, _dst_path, namespace)
273+
if info.is_file
274+
}
275+
src_state = [
276+
(filepath, info) for filepath, info in walker.info(src_fs, _src_path, namespace)
277+
]
278+
for dir_path, copy_info in src_state:
279+
copy_path = combine(
280+
_dst_path,
281+
frombase(_src_path, dir_path)
282+
)
283+
if copy_info.is_dir:
284+
dst_fs.makedir(copy_path, recreate=True)
285+
if copy_info.is_file:
286+
do_copy = False
287+
if dir_path not in dst_state:
288+
#dst file is not present
289+
do_copy = True
290+
else:
291+
#dst file is present, try to figure out if copy is necessary
292+
src_modified = copy_info.modified
293+
if src_modified is None:
294+
#cannot retrieve src modified timestamp => file will be copied
295+
do_copy = True
296+
dst_modified = dst_state[dir_path].modified
297+
if dst_modified is None:
298+
#cannot retrieve dst modified timestamp => file will be copied
299+
do_copy = True
300+
if src_modified > dst_modified:
301+
#src file is newer than dst file => file will be copied
302+
do_copy = True
303+
if do_copy:
304+
file_copied = copy_file(
305+
src_fs,
306+
dir_path,
307+
dst_fs,
308+
copy_path
309+
)
310+
if file_copied:
311+
if on_copy is not None:
312+
on_copy(src_fs, dir_path, dst_fs, copy_path)
313+
314+
315+

fs/ftpfs.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -322,7 +322,7 @@ def __str__(self):
322322

323323
def _open_ftp(self):
324324
_ftp = FTP()
325-
#_ftp.set_debuglevel(2)
325+
_ftp.set_debuglevel(0)
326326
with ftp_errors(self):
327327
_ftp.connect(self.host, self.port, self.timeout)
328328
_ftp.login(self.user, self.passwd, self.acct)

fs/osfs.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
from .permissions import Permissions
2828
from .error_tools import convert_os_errors
2929
from .mode import Mode, validate_open_mode
30-
30+
from .errors import NoURL
3131

3232
log = logging.getLogger('fs.osfs')
3333

@@ -305,6 +305,11 @@ def getsyspath(self, path):
305305
sys_path = self._to_sys_path(path)
306306
return sys_path
307307

308+
def geturl(self, path, purpose='download'):
309+
if purpose != 'download':
310+
raise NoURL(path, purpose)
311+
return "file://" + self.getsyspath(path)
312+
308313
def gettype(self, path):
309314
self.check()
310315
sys_path = self._to_sys_path(path)

0 commit comments

Comments
 (0)