Python将工作分布到多个进程
erhuabushuo
posted @ 2012年3月01日 03:41
in Python
, 2147 阅读
下面示例实现是在目录或者递归目录下查找文件中是否存在提供的字符串,返回该文件名。此示例是通过subprocess模块实现的多进程执行。
grepword-p.py
#!/usr/bin/env python3 import optparse import os import subprocess import sys def main(): child = os.path.join(os.path.dirname(__file__), "grepword-p-child.py") opts, word, args = parse_options() filelist = get_files(args, opts.recurse) files_per_process = len(filelist) // opts.count start, end = 0, files_per_process + (len(filelist) % opts.count) number = 1 pipes = [] while start < len(filelist): command = [sys.executable, child] if opts.debug: command.append(str(number)) pipe = subprocess.Popen(command, stdin=subprocess.PIPE) pipes.append(pipe) pipe.stdin.write(word.encode("utf8") + b"\n") for filename in filelist[start:end]: pipe.stdin.write(filename.encode("utf8") + b"\n") pipe.stdin.close() number += 1 start, end = end, end + files_per_process while pipes: pipe = pipes.pop() pipe.wait() def parse_options(): parser = optparse.OptionParser( usage=("usage: %prog [options] word name1 " "[name2 [... nameN]]\n\n" "names are filenames or paths; paths only " "make sense with the -r option set")) parser.add_option("-p", "--processes", dest="count", default=7, type="int", help=("the number of child processes to use (1..20) " "[default %default]")) parser.add_option("-r", "--recurse", dest="recurse", default=False, action="store_true", help="recurse into subdirectories") parser.add_option("-d", "--debug", dest="debug", default=False, action="store_true") opts, args = parser.parse_args() if len(args) == 0: parser.error("a word and at least one path must be specified") elif len(args) == 1: parser.error("at least one path must be specified") if (not opts.recurse and not any([os.path.isfile(arg) for arg in args])): parser.error("at least one file must be specified; or use -r") if not (1 <= opts.count <= 20): parser.error("process count must be 1..20") return opts, args[0], args[1:] def get_files(args, recurse): filelist = [] for path in args: if os.path.isfile(path): filelist.append(path) elif recurse: for root, dirs, files in os.walk(path): for filename in files: filelist.append(os.path.join(root, filename)) return filelist main()
grepword-p-child.py
#!/usr/bin/env python3 import sys BLOCK_SIZE = 8000 number = "{0}: ".format(sys.argv[1]) if len(sys.argv) == 2 else "" stdin = sys.stdin.buffer.read() lines = stdin.decode("utf8", "ignore").splitlines() word = lines[0].rstrip() for filename in lines[1:]: filename = filename.rstrip() previous = "" try: with open(filename, "rb") as fh: while True: current = fh.read(BLOCK_SIZE) if not current: break current = current.decode("utf8", "ignore") if (word in current or word in previous[-len(word):] + current[:len(word)]): print("{0}{1}".format(number, filename)) break if len(current) != BLOCK_SIZE: break previous = current except EnviromentError as err: print("{0}{1}".format(number, err))
接下来将研究将工作分配给多线程执行!