删除目录重复文件python脚本
#!/usr/bin/python
# -*-coding:utf-8 -*-
# 本目录下内容相同的文件仅保留首个批量处理脚本,注意不理会文件名,比如a.jpg与b.jpg,就只会保留a.jpg
# 注意,不处理子级目录
import os
import sys
if 2 != len(sys.argv):
print('请输入要处理目录')
exit(1)
dir_path = sys.argv.pop()
dir_path = os.path.abspath(dir_path)
if not os.path.isdir(dir_path):
print('路径必须选择目录类型')
exit()
dup_append_name = '重复文件.'
dir_name = os.path.basename(dir_path)
os.chdir(dir_path)
fp = os.listdir('.')
dup_num = 0
for p in fp:
# 文件可能被删除了
if not os.path.exists(p):
continue
if not os.path.isfile(p):
print('%s 不是文件,跳过' % p)
continue
size = os.path.getsize(p)
max_m = 100
if size > 1024 * 1024 * max_m:
print('%s 文件大小 %s 已经超过%sM,不处理' % (p, size, max_m))
continue
new_files = os.listdir('.')
for np in new_files:
# 当前待比较的文件跳过
if np == p:
continue
fc = pc = None
# noinspection PyBroadException
try:
fc = open(np, 'rb')
fc_b = fc.read()
fc.close()
pc = open(p, 'rb')
p_b = pc.read()
pc.close()
if p_b == fc_b:
new_rn = '%s%s' % (dup_append_name, np)
os.rename(np, new_rn)
print('重复的文件 %s 被重命名成 %s' % (np, new_rn))
dup_num += 1
except Exception:
print('比较内容时出错:')
raise
finally:
if fc:
# noinspection PyBroadException
try:
fc.close()
except Exception:
pass
if pc:
# noinspection PyBroadException
try:
pc.close()
except Exception:
pass
print('发现 %s 个重复文件,重复的文件名前将会被追加"%s"' % (dup_num, dup_append_name))
print('done')
发表于 2019.12.19 14:50:24