Home Backend Development Python Tutorial python 提取文件的小程序

python 提取文件的小程序

Jun 06, 2016 am 11:26 AM
python Extracting files





    2008-10-11 13:15:22
    C:\Program Files\WinRAR



Created on Mar 3, 2009
@author: alex cheng
from xml.dom.minidom import parse, parseString
import datetime
import time
class config(object):
def __init__(self, configfile):
configfile:config files
dom = parse(configfile)
self.config_element = dom.getElementsByTagName("config")[0]
def getSrcDir(self):
return the element value of self.config_element
srcDir = self.config_element.getElementsByTagName("srcdir")[0]
return self.getText(srcDir.childNodes)
def getDestDir(self):
return the element value of self.config_element
destDir = self.config_element.getElementsByTagName("destdir")[0]
return self.getText(destDir.childNodes)
def getNotIncludeDirs(self):
return a list, it's the

element values of self_config_element
notinclude_dirs = self.config_element.getElementsByTagName("dir")
dirList = []
for node in notinclude_dirs:
dir = self.getText(node.childNodes)
if dir != '':
return dirList
def getNotIncludeFiles(self):
return a list, it's the element values of self.config_element
notinclude_files = self.config_element.getElementsByTagName("file")
fileList = []
for node in notinclude_files:
file = self.getText(node.childNodes)
if file != '':
return fileList
def getText(self, nodeList):
return the text value of the nodeList node
rc = ''
for node in nodeList:
if node.nodeType == node.TEXT_NODE:
rc = rc + node.data
return rc
def getInitTime(self):
return a datetime object,it's the element value of self.config_element
initTime = self.config_element.getElementsByTagName("inittime")[0]
timeStr = self.getText(initTime.childNodes)
dt = datetime.datetime.strptime(timeStr, "%Y-%m-%d %H:%M:%S")
fdt = time.mktime(dt.utctimetuple())
return fdt
def getWinRarDir(self):
return the value of element value
rardir = self.config_element.getElementsByTagName('rardir')[0]
return self.getText(rardir.childNodes)
if __name__ == '__main__':
c = config('config.xml')
home = c.getSrcDir()
print('home is ', home)
dest = c.getDestDir()
print('dest is ', dest)
dirlist = c.getNotIncludeDirs()
print('not include directory is:')
for n in dirlist:
filelist = c.getNotIncludeFiles()
print('not include files is:')
for n in filelist:
inittime = c.getInitTime()
print('inittime is', inittime)
rardir = c.getWinRarDir()



Created on Mar 3, 2009
@author: alex cheng
from config import config
from os import chdir, listdir, makedirs, system, walk, remove, rmdir, unlink, \
removedirs, stat, getcwd
from os.path import abspath, isfile, isdir, join as join_path, exists
from shutil import copy2
from sys import path
import datetime
import re
import time
def getdestdir(dir):
return the dest directory name;
it's named by date,for example 20090101; if 20090101 has exist the return 20090101(1),if 20090101(1) has exist also,
then return 20090101(2), and then...
today = datetime.datetime.today()
strtoday = today.strftime('%Y%m%d')
dr = join_path(dir, strtoday)
tmp = dr
index = 0
while isdir(tmp):
tmp = dr
index = index + 1
tmp = tmp + '(' + '%d' % index + ')'
return tmp
def fetchFiles(srcdir, destdir, ignoredirs, ignorefiles, lasttime=time.mktime(datetime.datetime(2000, 1, 1).utctimetuple())):
fetch files from srcdir(source directory) to destdir(dest directory) ignore the notcopydires(the ignore directory list)
and notcopyfiles(the ignore file list), and the file and directory's modify time after the lasttime
chdir(srcdir) # change the current directory to the srcdir
dirs = listdir('.') # get all files and directorys in srcdir, but ignore the "." and ".."
dirlist = [] # save all directorys in srcdir
for n in dirs:
if isdir(n):
for subdir in dirlist:
exist = False
for ignoredir in ignoredirs:
if join_path(srcdir, subdir) == ignoredir:
exist = True
if exist:
fetchFiles(join_path(srcdir, subdir), join_path(destdir, subdir), ignoredirs, ignorefiles, lasttime)
copyfiles(srcdir, destdir, ignorefiles, lasttime)
def copyfiles(srcdir, destdir, ignorefiles, lasttime):
copy the files from srcdir(source directory) to destdir(dest directory, if dest directory not exist then create is)
ignore the notcopyfiles(the ignore file list) and the file's modify time must after lasttime
files = filter(isfile, listdir('.'))
for file in files:
if isdir(file): # ignore the directory
lastmodify = stat(file).st_mtime
if lastmodify continue
exist = False
for ignorefile in ignorefiles:
if join_path(srcdir, file) == ignorefile:
exist = True
if not exist:
if isdir(destdir) is False:
print('success create directory:', destdir)
raise Exception('failed create directory: ' + destdir)
copy2(file, join_path(destdir, file))
print('success copy file from', join_path(srcdir, file), 'to', join_path(destdir, file))
raise Exception('failed copy file from ' + join_path(srcdir, file) + ' to ' + join_path(destdir, file))

def tarfiles(dir, todir, winrardir, tarfilename):
tar all files in dir(a directory) to todir(dest directory) and the tar file named tarfilename
if isdir(dir) is False:
print('the directory', dir, 'not exist')
commond = '\"' + winrardir + '\\rar.exe\" a -r ' + todir + '\\' + tarfilename + ' *.*'
if system(commond) == 0:
print('success tar files')
print('failed tar files')

def removeDir(dir_file, currentdir):
delete the dir_file
if isdir(currentdir) is False:
if not exists(dir_file):
if isdir(dir_file):
for root, dirs, files in walk(dir_file, topdown=False):
for name in files:
remove(join_path(root, name))
for name in dirs:
rmdir(join_path(root, name))
rmdir(dir_file) # remove the main dir
def getlasttime():
get last modify time from txt files
mypath = abspath(path[0]) #get current path
file = join_path(mypath, 'C_UPGRADETIME.txt')
if isfile(file) is False:
return 0
f = open(join_path(mypath, 'C_UPGRADETIME.txt'), 'r')
lines = f.readlines()
if len(lines) == 0:
return 0
line = lines[ - 1]

dt = datetime.datetime.strptime(line, "%Y-%m-%d %H:%M:%S")
lasttime = time.mktime(dt.utctimetuple())
return lasttime
print('failed to get last modify time from txt file')
return 0
def registtime():
nowstr = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
nowfloat = time.time()
mypath = abspath(path[0]) # get current path
f = open(join_path(mypath, 'C_UPGRADETIME.txt'), 'a')
f.write('\n' + nowstr)
def main():
c = config('config.xml')
home = c.getSrcDir()
dest = c.getDestDir()
ignoreDirs = c.getNotIncludeDirs()
ignoreFiles = c.getNotIncludeFiles()
winRarDir = c.getWinRarDir()

dest = getdestdir(dest)# get current dest directory

print ('copy all files to the temp directory ignore last fetch time')
fetchFiles(home, join_path(dest, 'temp'), ignoreDirs, ignoreFiles)

print('tar the all files')
tarfiles(join_path(dest, 'temp'), dest, winRarDir, 'CargillUpdate_ALL.rar')

print('program sleep 20 seconds to finish the tar thread')

print('remove the temp directory...')
removeDir(join_path(dest, 'temp'), dest)
print('success remove the temp directory')
lasttime = getlasttime() # get last modify time from txt files
if lasttime == 0:
lasttime = c.getInitTime()
print ('copy all files to the temp2 directory last modify time after last fetch time')
fetchFiles(home, join_path(dest, 'temp2'), ignoreDirs, ignoreFiles, lasttime)

print('tar the all files')
tarfiles(join_path(dest, 'temp2'), dest, winRarDir, 'CargillUpdate.rar')
print('program sleep 20 seconds to finish the tar thread')

print('remove the temp2 directory...')
removeDir(join_path(dest, 'temp2'), dest)
print('success remove the temp2 directory')

registtime() # regist current time
if __name__ == '__main__':

Statement of this Website
The content of this article is voluntarily contributed by netizens, and the copyright belongs to the original author. This site does not assume corresponding legal responsibility. If you find any content suspected of plagiarism or infringement, please contact admin@php.cn

Hot Article Tags



Easy-to-use and free code editor

SublimeText3 Chinese version

SublimeText3 Chinese version

Chinese version, very easy to use

Zend Studio 13.0.1

Zend Studio 13.0.1

Powerful PHP integrated development environment

Dreamweaver CS6

Dreamweaver CS6

Visual web development tools

SublimeText3 Mac version

SublimeText3 Mac version

God-level code editing software (SublimeText3)

How to download deepseek Xiaomi How to download deepseek Xiaomi Feb 19, 2025 pm 05:27 PM

How to download deepseek Xiaomi

What are the advantages and disadvantages of templating? What are the advantages and disadvantages of templating? May 08, 2024 pm 03:51 PM

What are the advantages and disadvantages of templating?

Google AI announces Gemini 1.5 Pro and Gemma 2 for developers Google AI announces Gemini 1.5 Pro and Gemma 2 for developers Jul 01, 2024 am 07:22 AM

Google AI announces Gemini 1.5 Pro and Gemma 2 for developers

For only $250, Hugging Face's technical director teaches you how to fine-tune Llama 3 step by step For only $250, Hugging Face's technical director teaches you how to fine-tune Llama 3 step by step May 06, 2024 pm 03:52 PM

For only $250, Hugging Face's technical director teaches you how to fine-tune Llama 3 step by step

Share several .NET open source AI and LLM related project frameworks Share several .NET open source AI and LLM related project frameworks May 06, 2024 pm 04:43 PM

Share several .NET open source AI and LLM related project frameworks

A complete guide to golang function debugging and analysis A complete guide to golang function debugging and analysis May 06, 2024 pm 02:00 PM

A complete guide to golang function debugging and analysis

How do you ask him deepseek How do you ask him deepseek Feb 19, 2025 pm 04:42 PM

How do you ask him deepseek

How to save the evaluate function How to save the evaluate function May 07, 2024 am 01:09 AM

How to save the evaluate function

See all articles