用Python列出指定目录下的所有文件并保存记录
要说Python绝对是最好用的工具语言,曾写过用来批量修改文件后缀名的脚本,今天想为早期的静态站点存档的html文件建立sitemap文件,于是便有了这样的需求:
将指定目录下的所有文件(包含子目录)列表出来,当然要筛选出需要的后缀名.htm
、.html
的文件并保存到txt
文件中,下面为全部实现代码:
import os
'''
For the given path, get the List of all files in the directory tree
'''
def getListOfFiles(dirName):
# create a list of file and sub directories
# names in the given directory
listOfFile = os.listdir(dirName)
allFiles = list()
# Iterate over all the entries
for entry in listOfFile:
# Create full path
fullPath = os.path.join(dirName, entry)
# If entry is a directory then get the list of files in this directory
if os.path.isdir(fullPath):
allFiles = allFiles + getListOfFiles(fullPath)
else:
allFiles.append(fullPath)
return allFiles
def main():
dirName = 'Path/to/Directory';
# Option1 :: Get the list of all files in directory tree at given path
listOfFiles = getListOfFiles(dirName)
# Print the files
for elem in listOfFiles:
#print(elem)
filename, file_extension = os.path.splitext(elem)
#Extensions filter
extensions = ['.htm', '.html']
#print(file_extension)
if file_extension in extensions:
with open('files.txt', 'a') as f:
f.write(elem)
f.write('\n')
print ("*********Done*******")
'''
# Option 2 :: Get the list of all files in directory tree at given path
listOfFiles = list()
for (dirpath, dirnames, filenames) in os.walk(dirName):
listOfFiles += [os.path.join(dirpath, file) for file in filenames]
# Print the files
for elem in listOfFiles:
filename, file_extension = os.path.splitext(elem)
extensions = ['.htm', '.html']
#print(file_extension)
if file_extension in extensions:
print(elem)
'''
if __name__ == '__main__':
main()