建议在文件复制、移动、上传时，增加异常处理

费牛的牛 · 2025-5-26 16:29:20

如图一所示，当我从NTFS文件系统中复制文件到ntrfs系统时，出现报错，900多项，没有列全，由于文件太多，这种报错根本没法处理

优化建议：

1.在文件设置中，增加文件超长异常偏好（图二）

2.文件任务的查看详情能列全内容（分页，无图，应该好理解）

3.文件任务详情增加异常处理选择：忽略、截断（图四）

4.操作文件前，如果是询问，则弹出提示忽略、截断（图三）

费牛的牛 · 2025-5-26 16:49:19

以下代码给遇到相同问题的朋友（需要有python环境），用于自动复制缺失文件，超长的可自动截断，其中目录都是通过smb映射到一台windows上，然后用python运行

import os
import shutil
import logging
import sys
from pathlib import Path
from datetime import datetime

# 配置参数 - 可根据实际需求修改
SOURCE_DIRECTORY = Path("D:/test1")           # 源目录，检查此目录中的文件
TARGET_COMPARISON_DIRECTORY = Path("X:/test1")  # 目标目录，与源目录比较
COPY_TARGET_DIRECTORY = Path("X:/test1")      # 复制缺失文件的目标位置
MISSING_FILE_LOG = Path("遗漏的文件.txt")        # 记录遗漏文件的日志文件
SHOULD_TRUNCATE_FILENAME = True                  # 是否截断超长文件名
SHOULD_COPY_TO_TARGET = True                     # 是否执行文件复制操作
MAX_WINDOWS_PATH_LENGTH = 4096                    # 最大路径长度限制（Windows标准）
MAX_WINDOWS_FILENAME_LENGTH = 255                 # 最大文件名长度限制（Windows标准）
SHOULD_PRINT_FILENAME_STATS = True               # 是否打印文件名处理统计信息
SHOULD_CHECK_MISSING = True                      # 是否检测文件缺失

# 配置日志
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.StreamHandler(sys.stdout)
    ]
)
LOGGER = logging.getLogger(__name__)

def get_file_list(root_directory):
    """获取指定目录下所有文件的相对路径集合"""
    file_relative_path_set = set()
  
    for root, _, files in os.walk(root_directory):
        for file in files:
            full_path = Path(root) / file
            relative_path = full_path.relative_to(root_directory)
            file_relative_path_set.add(str(relative_path))
  
    return file_relative_path_set

def get_byte_length(input_string):
    """获取字符串的UTF-8字节长度"""
    try:
        return len(input_string.encode('utf-8'))
    except:
        return len(input_string)  # 回退到字符长度

def truncate_path_component(path_component, max_byte_length):
    """截断路径组件（文件名或目录名），确保不超过指定的字节长度"""
    if get_byte_length(path_component) <= max_byte_length:
        return path_component
  
    # 逐字符构建，确保不超过最大字节长度
    truncated_component = ""
    current_byte_length = 0
  
    for c in path_component:
        char_string = c
        char_byte_length = get_byte_length(char_string)
        if current_byte_length + char_byte_length > max_byte_length:
            break
        truncated_component += c
        current_byte_length += char_byte_length
  
    return truncated_component

def ensure_path_length(source_file_relative_path, target_base_directory):
    """确保目标路径长度符合限制，必要时截断路径组件"""
    if not SHOULD_TRUNCATE_FILENAME:
        return target_base_directory / source_file_relative_path
  
    source_file_path = Path(source_file_relative_path)
    initial_target_path = target_base_directory / source_file_relative_path
  
    # 第一步：校验并截断每个路径组件（文件名和文件夹名）
    path_components = []
    for part in source_file_path.parts:
        component = str(part)
        component_byte_length = get_byte_length(component)
  
        if component_byte_length > MAX_WINDOWS_FILENAME_LENGTH:
            file_extension = Path(component).suffix
            max_component_length = MAX_WINDOWS_FILENAME_LENGTH - (len(file_extension) if file_extension else 0)
            truncated_component = truncate_path_component(component, max_component_length)
            path_components.append(truncated_component + file_extension)
  
            if SHOULD_PRINT_FILENAME_STATS:
                LOGGER.info(f"组件截断: {component_byte_length} 字节 -> {get_byte_length(truncated_component)} 字节")
                LOGGER.info(f"原始组件: {component}")
                LOGGER.info(f"截断后组件: {truncated_component}")
        else:
            path_components.append(component)
  
    # 重新构建路径
    processed_path = target_base_directory
    for component in path_components:
        processed_path = processed_path / component
  
    # 第二步：校验完整路径长度
    full_path_byte_length = get_byte_length(str(processed_path))
    if full_path_byte_length <= MAX_WINDOWS_PATH_LENGTH:
        return processed_path
  
    # 如果完整路径仍然超长，进行路径整体截断
    if SHOULD_PRINT_FILENAME_STATS:
        LOGGER.info(f"完整路径超长: {full_path_byte_length} 字节 (限制: {MAX_WINDOWS_PATH_LENGTH} 字节)")
  
    # 分离目录和文件名
    parent_directory = processed_path.parent
    file_name = processed_path.name
  
    # 截断目录部分
    directory_components = list(parent_directory.parts) if parent_directory else []
  
    # 逐步缩短目录组件，直到路径符合要求
    is_path_truncated = False
    while directory_components:
        # 尝试缩短最后一个目录组件
        last_index = len(directory_components) - 1
        last_directory_component = directory_components[last_index]
        if len(last_directory_component) > 10:  # 只缩短较长的组件
            shortened_component = truncate_path_component(last_directory_component, len(last_directory_component) // 2)
            directory_components[last_index] = shortened_component
  
            # 重新构建路径并检查长度
            new_parent_directory = target_base_directory
            for component in directory_components:
                new_parent_directory = new_parent_directory / component
            new_path = new_parent_directory / file_name
  
            if get_byte_length(str(new_path)) <= MAX_WINDOWS_PATH_LENGTH:
                is_path_truncated = True
                processed_path = new_path
                if SHOULD_PRINT_FILENAME_STATS:
                    LOGGER.info(f"路径整体截断成功: {full_path_byte_length} 字节 -> {get_byte_length(str(new_path))} 字节")
                break
  
        # 如果缩短后仍然超长，移除最后一个目录组件
        directory_components.pop()
  
    # 如果路径仍然超长，截断文件名
    if not is_path_truncated and get_byte_length(str(processed_path)) > MAX_WINDOWS_PATH_LENGTH:
        # 正确分离文件名和扩展名
        base_file_name = file_name
        file_extension = ""
        dot_index = file_name.rfind('.')
        if dot_index > 0 and dot_index < len(file_name) - 1:
            base_file_name = file_name[:dot_index]
            file_extension = file_name[dot_index:]
  
        # 计算还能保留的文件名长度
        current_parent_directory = processed_path.parent
        parent_directory_byte_length = get_byte_length(str(current_parent_directory)) if current_parent_directory else 0
        allowed_file_name_byte_length = MAX_WINDOWS_PATH_LENGTH - parent_directory_byte_length - 1  # 减去路径分隔符长度
  
        if allowed_file_name_byte_length > 0:
            # 预留空间给数字后缀 (最多 "_9999")
            reserved_byte_length = 6
            max_base_file_name_length = max(1, allowed_file_name_byte_length - get_byte_length(file_extension) - reserved_byte_length)
  
            truncated_base_file_name = truncate_path_component(base_file_name, max_base_file_name_length)
            new_file_name = truncated_base_file_name + file_extension
            processed_path = current_parent_directory / new_file_name
  
            if SHOULD_PRINT_FILENAME_STATS:
                LOGGER.info(f"文件名二次截断: {get_byte_length(file_name)} 字节 -> {get_byte_length(new_file_name)} 字节")
                LOGGER.info(f"原始文件名: {file_name}")
                LOGGER.info(f"二次截断后文件名: {new_file_name}")
  
    return processed_path

def handle_duplicate_filename(target_directory, original_file_name):
    """处理重复文件名，添加数字后缀生成唯一文件名"""
    # 分离文件名和扩展名
    base_file_name = original_file_name
    file_extension = ""
    dot_index = original_file_name.rfind('.')
  
    # 确保正确分离扩展名
    if dot_index > 0:
        base_file_name = original_file_name[:dot_index]
        file_extension = original_file_name[dot_index:]
  
    # 计算允许的最大基础名称长度（不含扩展名和后缀）
    max_base_file_name_length = MAX_WINDOWS_FILENAME_LENGTH - get_byte_length(file_extension) - 6  # 减去后缀"_9999"的最大长度
  
    # 截断基础名称，确保有足够空间添加数字后缀
    truncated_base_file_name = truncate_path_component(base_file_name, max(max_base_file_name_length, 1))
  
    counter = 1
    while True:
        suffix = f"_{counter}"
        new_file_name = truncated_base_file_name + suffix + file_extension
        new_file_path = target_directory / new_file_name
  
        # 验证新文件名长度
        if get_byte_length(new_file_name) > MAX_WINDOWS_FILENAME_LENGTH:
            # 如果添加后缀后超长，进一步截断基础名称
            reduce_by = get_byte_length(new_file_name) - MAX_WINDOWS_FILENAME_LENGTH
            truncated_base_file_name = truncate_path_component(truncated_base_file_name, max(1, len(truncated_base_file_name) - reduce_by))
            new_file_name = truncated_base_file_name + suffix + file_extension
            new_file_path = target_directory / new_file_name
  
        if not new_file_path.exists():
            if SHOULD_PRINT_FILENAME_STATS:
                LOGGER.info("文件名重复处理:")
                LOGGER.info(f"原始文件名: {original_file_name}")
                LOGGER.info(f"生成唯一文件名: {new_file_name}")
            return new_file_path
        counter += 1

def copy_missing_files(missing_file_relative_paths):
    """复制缺失的文件到目标目录，处理路径长度和重复文件名"""
    missing_file_count = len(missing_file_relative_paths)
    successful_copy_count = 0
    failed_copy_count = 0
  
    try:
        with open(MISSING_FILE_LOG, 'w', encoding='utf-8') as writer:
            for relative_path in missing_file_relative_paths:
                source_file_path = SOURCE_DIRECTORY / relative_path
                target_file_path = ensure_path_length(relative_path, COPY_TARGET_DIRECTORY)
  
                # 记录遗漏的文件
                writer.write(f"{source_file_path}\n")
                LOGGER.info(f"遗漏文件: {source_file_path}")
  
                if not SHOULD_COPY_TO_TARGET:
                    continue
  
                try:
                    # 确保目标目录存在
                    target_directory = target_file_path.parent
                    target_directory.mkdir(parents=True, exist_ok=True)
      
                    # 处理文件名重复
                    if target_file_path.exists():
                        target_file_path = handle_duplicate_filename(target_directory, target_file_path.name)
      
                    # 复制文件（保留元数据）
                    shutil.copy2(source_file_path, target_file_path)
                    LOGGER.info(f"成功复制: {source_file_path} -> {target_file_path}")
                    successful_copy_count += 1
                except Exception as e:
                    LOGGER.error(f"复制失败: {source_file_path} -> {target_file_path}, 错误: {str(e)}")
                    failed_copy_count += 1
    except Exception as e:
        LOGGER.error(f"写入日志文件失败: {str(e)}")
  
    LOGGER.info(f"检查完成: 共发现 {missing_file_count} 个缺失文件")
    if SHOULD_COPY_TO_TARGET:
        LOGGER.info(f"复制结果: 成功 {successful_copy_count} 个, 失败 {failed_copy_count} 个")
        LOGGER.info(f"遗漏的文件已记录到: {MISSING_FILE_LOG}")

def main():
    """主函数"""
    LOGGER.info(f"开始操作，是否检测文件缺失: {SHOULD_CHECK_MISSING}")
  
    try:
        if SHOULD_CHECK_MISSING:
            # 获取目录A和目录B的文件列表
            source_directory_file_relative_paths = get_file_list(SOURCE_DIRECTORY)
            target_comparison_directory_file_relative_paths = get_file_list(TARGET_COMPARISON_DIRECTORY)
  
            # 找出在A中但不在B中的文件（集合差运算）
            missing_file_relative_paths = source_directory_file_relative_paths - target_comparison_directory_file_relative_paths
        else:
            missing_file_relative_paths = get_file_list(SOURCE_DIRECTORY)
  
        # 复制缺失的文件到目录C
        copy_missing_files(missing_file_relative_paths)
  
    except Exception as e:
        LOGGER.error(f"操作失败: {str(e)}", exc_info=True)

if __name__ == "__main__":
    main()

费牛的牛 · 2025-5-30 21:34:23

怎么没有官方回复？

		自动登录	找回密码
密码			立即注册

建议在文件复制、移动、上传时，增加异常处理

本帖子中包含更多资源

浏览过的版块