无用图片扫描、删除脚本(Python)

系统 412 0

使用Python开发检查脚本,原理是:

1. 首先扫描所有特定后缀( '.png', '.jpg', 'jpeg' , '.imageset')等格式的图片文件 或 图片文件夹,获取文件名,存入 allImageFiles 数组中;

2. 扫描所有代码文件,逐行分析,若有引用 allImageFiles 内图片文件文件名的字符串,则将该字符串对应的图片文件列为有用图片,存入 usedImageFiles 中;

3. 取 allImageFiles 与 usedImageFiles 的差集,即可获得无用图片集 unusedFiles ;

4. 输出无用图片集 unusedFiles 至文本内,并计算总图片大小、无用图片大小;

5. 可选地,可删除所有无用图片文件 及 图片文件夹。

为避免误删,脚本执行结束后,需要对删除的图片文件 review。

            
              # coding=utf-8

import os, sys, codecs, re, shutil

reload(sys)
sys.setdefaultencoding('utf-8') 

allImageFiles = set()
usedImageFiles = set()

fileCount = 0

'''
upload_failed@1x.png -> upload_failed
'''
removeSuffixReg = re.compile('\d$|@\dx$|\d@\dx$')
def transferImagefileToImageName(imageFile):
    filename = os.path.basename(imageFile)
    (filenameNoExtension, _) = os.path.splitext(filename)
    imageName = removeSuffixReg.sub('', filenameNoExtension)
    return imageName;

def scanAboundentImage(paths):
    def checkImageCallback(fullPath, filename, extension):
        global allImageFiles
        allImageFiles.add(fullPath)

    # 扫描无用图片文件 (较通用方法)
#    travel(paths, ['.png', '.jpg', 'jpeg', '.imageset'], checkImageCallback)
    # 扫描 ['.png', '.jpg', 'jpeg'] 格式的无用图片文件,及 ['.imageset'] 格式的无用图片文件夹 (适用于 iOS)
    travelFilesAndDirs(paths, ['.png', '.jpg', 'jpeg'], ['.imageset'], checkImageCallback)
    
    checkIfImageHasUsed(paths, allImageFiles)
    anylasisImageFilesOutput()

def anylasisImageFilesOutput():
    global allImageFiles, usedImageFiles
    unusedFiles = allImageFiles - usedImageFiles
    unusedSize = 0
    totalSize = 0
    outputFile = open(os.path.join(os.getcwd(), "uselessImage.txt"), "w")
    for aFile in allImageFiles:
        if os.path.exists(aFile):
            totalSize += countSize(aFile)

    for unusedFile in unusedFiles:
        if os.path.exists(unusedFile):
            unusedSize += countSize(unusedFile)
#            print(unusedFile)
            # 无用文件列表:输出至 -> 脚本所在目录下的 文本文件中
            print >> outputFile, unusedFile
            # 删除所有无用图片 or 目录,默认禁用
            #removeFilesOrDirs(unusedFile)

    outputFile.close()
    print('all files count is %s \nunused files count is %s' % (len(allImageFiles), len(unusedFiles)))
    print('total size of file is %s KB \nunused file size is %s KB' % (totalSize / 1024, unusedSize / 1024))

def removeFilesOrDirs(path):
    if os.path.exists(path):
        if os.path.isfile(path):
            os.remove(path)
        elif os.path.isdir(path):
            shutil.rmtree(path)

# 遍历文件夹,计算文件大小
def countSize(path):
    totalSize = 0
    #判断是否为目录
    if (os.path.isdir(path)):
        # 遍历目录下所有文件根,目录下的每一个文件夹(包含它自己), 产生3-元组 (dirpath, dirnames, filenames)【文件夹路径, 文件夹名字, 文件名称】
        for r, ds, files in os.walk(path):
            #遍历所有文件
            for file in files:
                totalSize += os.path.getsize(os.path.join(r, file)) #获取文件大小
    else:
        totalSize = os.path.getsize(path)
    return totalSize

def checkIfImageHasUsed(paths, imageFiles):
    def travelCallback(fullPath, filename, extension):
        fileNameMapToCodeString = {}
        for imageFile in imageFiles:
            fileNameMapToCodeString[imageFile] = "\"%s" % transferImagefileToImageName(imageFile)

        with codecs.open(fullPath, 'r', 'utf-8') as file:
            codeLines = file.readlines()
            for line in codeLines:
                for imageFile, imageNameInCode in fileNameMapToCodeString.iteritems():
                    if imageNameInCode in line:
                        global usedImageFiles
                        usedImageFiles.add(imageFile)

    travel(paths, ['.h', '.m', '.xib', '.storyboard'], travelCallback)

# 搜索所有“文件”,筛选符合后缀规则的文件
def travel(paths, fileExtensions, callback):
    for path in paths:
        for dirpath, dirnames, filenames in os.walk(path):
            for filename in filenames:
                fullPath = os.path.join(dirpath, filename)
                (_, extension) = os.path.splitext(filename)
                if os.path.exists(fullPath) and extension in fileExtensions:
                    callback(fullPath, filename, extension)


# 搜索所有“文件 + 文件夹”,筛选符合后缀规则的文件及文件夹。文件所在的文件夹若匹配,则文件不再匹配(不匹配 imageset 内部的图片文件)
def travelFilesAndDirs(paths, fileExtensions, dirExtensions, callback):
    for path in paths:
        for dirpath, dirnames, filenames in os.walk(path):
            for filename in filenames:
                fullPath = os.path.join(dirpath, filename)
                (_, extension) = os.path.splitext(filename)
                if os.path.exists(fullPath) and extension in fileExtensions and not isFileDirHasTargetExtensions(fullPath, dirExtensions):
                    callback(fullPath, filename, extension)

            for dirname in dirnames:
                fullPath = os.path.join(dirpath, dirname)
                (_, extension) = os.path.splitext(dirname)
                if os.path.exists(fullPath) and extension in dirExtensions:
                    callback(fullPath, dirname, extension)

def isFileDirHasTargetExtensions(path, dirExtensions):
    parentDirName = os.path.basename(os.path.dirname(path))
    (_, extension) = os.path.splitext(parentDirName)
    return extension in dirExtensions


if sys.argv[1:]:
    scanAboundentImage(sys.argv[1:])
else:
    print "Please pass the paths to check as parameters to the script"
            
          

 


更多文章、技术交流、商务合作、联系博主

微信扫码或搜索:z360901061

微信扫一扫加我为好友

QQ号联系: 360901061

您的支持是博主写作最大的动力,如果您喜欢我的文章,感觉我的文章对您有帮助,请请扫描上面二维码支持博主1元、2元、5元等您想捐的金额吧,狠狠点击下面给点支持吧

发表我的评论
最新评论 总共0条评论