我写了一个向叠瓦盘复制数据的Python程序,确保不爆盘
我淘了一个二手wd hc620的叠瓦盘,挂在飞牛上共享当数据盘,结果用fastcopy复制了2T的程序就爆盘了,足足等了一天才回复正常。
我用Python写了一个复制程序,可以指定每上传15GB,就暂停5分钟,这样就不会爆盘了,我把代码共享给大家。
希望大家一起研究一下,一次上传多少量,休息多久比较能兼顾效率和安全?
import os
import sys
import shutil
import time
from pathlib import Path
class Tee:
"""同时写入多个输出流,并在每行开头自动添加当前时间(到秒)"""
def __init__(self, *files):
self.files = files
self.at_line_start = True # 标记当前是否在行首
def write(self, obj):
# 按行分割,保留末尾换行符
lines = obj.splitlines(True) # keepends=True 保留 \n
timestamp = time.strftime('%Y-%m-%d %H:%M:%S')
for line in lines:
# 行首且非空行时添加时间戳,空行(如纯换行)不加
prefix = f"[{timestamp}] " if (self.at_line_start and line.strip()) else ""
for f in self.files:
f.write(prefix + line)
# 若该行以换行结束,则下一行是新行开头
self.at_line_start = line.endswith('\n')
self.flush()
def flush(self):
for f in self.files:
f.flush()
class FileCopier:
"""
单线程文件复制器,支持数据量限制和暂停机制,保留文件元数据。
"""
def __init__(self, source: str, target: str, limit_gb: float, pause_seconds: int):
self.source = Path(source).resolve()
self.target = Path(target).resolve()
self.limit_bytes = int(limit_gb * 1024 ** 3) if limit_gb > 0 else 0
self.pause_seconds = pause_seconds
self.file_list = []
# 新增:创建日志文件
log_name = f"飞牛叠瓦盘复制{time.strftime('%Y%m%d_%H%M%S')}.log"
self.log_file = open(log_name, 'w', encoding='utf-8')
def _collect_files(self):
"""收集所有需要复制的源文件及对应的目标路径"""
if self.source.is_file():
# 源是单个文件
dest = self.target / self.source.name
self.file_list.append((self.source, dest))
elif self.source.is_dir():
# 源是文件夹,递归遍历
for root, _, files in os.walk(self.source):
src_dir = Path(root)
rel_path = src_dir.relative_to(self.source)
dest_dir = self.target / rel_path
for file in files:
src_file = src_dir / file
dest_file = dest_dir / file
self.file_list.append((src_file, dest_file))
else:
raise FileNotFoundError(f"源路径不存在: {self.source}")
# 按源文件路径排序(可选,保证复制顺序稳定)
self.file_list.sort(key=lambda x: str(x[0]))
def _need_copy(self, src: Path, dst: Path) -> bool:
"""
判断是否需要复制文件
若目标不存在,或大小/修改时间不一致,则需要复制
:return: True 需要复制,False 可跳过
"""
if not dst.exists():
return True
# 比较大小和修改时间
if src.stat().st_size != dst.stat().st_size:
return True
if src.stat().st_mtime != dst.stat().st_mtime:
return True
return False
def _copy_file(self, src: Path, dst: Path) -> int:
"""
复制单个文件,保留元数据(修改时间、访问时间等)
返回文件大小(字节)
"""
file_size = src.stat().st_size
print(f'当前文件大小:{file_size/ (1024**2):.2f}MB')
# 确保目标目录存在
dst.parent.mkdir(parents=True, exist_ok=True)
# copy2 会保留原始文件的元数据
shutil.copy2(src, dst)
return file_size
def run(self):
original_stdout = sys.stdout
sys.stdout = Tee(original_stdout, self.log_file)
try:
print("正在收集文件列表...")
self._collect_files()
total_files = len(self.file_list)
if total_files == 0:
print("没有需要复制的文件。")
return
print(f"共发现 {total_files} 个文件需要处理")
if self.limit_bytes > 0:
print(f"数据量限制: {self.limit_bytes / (1024**3):.2f} GB,超限后暂停 {self.pause_seconds} 秒")
else:
print("未设置数据量限制")
current_batch_bytes = 0
total_bytes_copied = 0
copied_count = 0
skipped_count = 0
failed_count = 0
start_time = time.time()
batch_start_time = start_time
batch_copied = 0
batch_skipped = 0
batch_failed = 0
batch_count = 0 # 新增:批次计数
for src, dst in self.file_list:
if not self._need_copy(src, dst):
print(f"跳过(文件一致): {src} -> {dst}")
skipped_count += 1
batch_skipped += 1
continue
try:
print(f"复制: {src} -> {dst}")
file_size = self._copy_file(src, dst)
current_batch_bytes += file_size
total_bytes_copied += file_size
copied_count += 1
batch_copied += 1
print(f" 完成,当前批次完成量 {current_batch_bytes / (1024**3):.2f} GB,总计已复制 {total_bytes_copied / (1024**3):.2f} GB")
if self.limit_bytes > 0 and current_batch_bytes >= self.limit_bytes:
batch_count += 1 # 记录本批次
batch_elapsed = time.time() - batch_start_time
speed = current_batch_bytes / batch_elapsed if batch_elapsed > 0 else 0
print(f"已达到数据量限制 ({current_batch_bytes / (1024**3):.2f} GB),暂停 {self.pause_seconds} 秒...")
print(f" [本批次小结] 成功: {batch_copied} 个, 跳过: {batch_skipped} 个, 失败: {batch_failed} 个")
print(f" 复制数据: {current_batch_bytes / (1024**3):.2f} GB, 耗时: {batch_elapsed:.1f} 秒, 速度: {speed / (1024**2):.2f} MB/s")
total_elapsed = time.time() - start_time
total_speed = total_bytes_copied / total_elapsed if total_elapsed > 0 else 0
print(f" [累计统计] 成功: {copied_count} 个, 跳过: {skipped_count} 个, 失败: {failed_count} 个")
print(f" 累计复制: {total_bytes_copied / (1024**3):.2f} GB, 总耗时: {total_elapsed:.1f} 秒, 整体速度: {total_speed / (1024**2):.2f} MB/s")
time.sleep(self.pause_seconds)
current_batch_bytes = 0
batch_start_time = time.time()
batch_copied = 0
batch_skipped = 0
batch_failed = 0
except Exception as e:
print(f"复制失败: {src} -> {dst}, 错误: {e}")
failed_count += 1
batch_failed += 1
# 处理最后一个可能未满的批次
if batch_copied + batch_skipped + batch_failed > 0:
batch_count += 1
end_time = time.time()
total_duration = end_time - start_time
avg_speed = total_bytes_copied / total_duration if total_duration > 0 else 0
print("\n========== 所有文件处理完毕! ==========")
print(f"开始时间: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time))}")
print(f"结束时间: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time))}")
print(f"总耗时: {total_duration:.1f} 秒")
print(f"平均速度: {avg_speed / (1024**2):.2f} MB/s")
print(f"总批次: {batch_count} 批")
print(f"成功复制: {copied_count} 个文件")
print(f"跳过文件: {skipped_count} 个")
print(f"失败文件: {failed_count} 个")
print(f"累计复制数据: {total_bytes_copied / (1024**3):.2f} GB")
print("=" * 45)
finally:
sys.stdout = original_stdout
self.log_file.close()
def main():
# ========== 用户配置区域 ==========
# 源路径:可以是文件或文件夹(使用字符串变量)
source_path = r"D:\摄影作品" # 请替换为实际路径
# 目标目录
target_dir = r"Z:\摄影作品" # 请替换为实际路径
# 每次复制的数据量限制(GB),设为 0 表示无限制
limit_gb = 15.0 # 例如 1 GB
# 超过限制后暂停的秒数
pause_seconds = 300
# =================================
copier = FileCopier(source_path, target_dir, limit_gb, pause_seconds)
copier.run()
if __name__ == "__main__":
main()