Python交互式解释器自动补全
Python将工作分布到多个线程

Python将工作分布到多个进程

erhuabushuo posted @ 2012年3月01日 03:41 in Python , 2147 阅读

下面示例实现是在目录或者递归目录下查找文件中是否存在提供的字符串,返回该文件名。此示例是通过subprocess模块实现的多进程执行。

grepword-p.py

#!/usr/bin/env python3

import optparse
import os
import subprocess
import sys

def main():
	child = os.path.join(os.path.dirname(__file__),
						 "grepword-p-child.py")
	opts, word, args = parse_options()
	filelist = get_files(args, opts.recurse)
	files_per_process = len(filelist) // opts.count
	start, end = 0, files_per_process + (len(filelist) % opts.count)
	number = 1
	
	pipes = []
	while start < len(filelist):
		command = [sys.executable, child]
		if opts.debug:
			command.append(str(number))
		pipe = subprocess.Popen(command, stdin=subprocess.PIPE)
		pipes.append(pipe)
		pipe.stdin.write(word.encode("utf8") + b"\n")
		for filename in filelist[start:end]:
			pipe.stdin.write(filename.encode("utf8") + b"\n")
		pipe.stdin.close()
		number += 1
		start, end = end, end + files_per_process
	while pipes:
		pipe = pipes.pop()
		pipe.wait()
	
def parse_options():
	parser = optparse.OptionParser(
			usage=("usage: %prog [options] word name1 "
				   "[name2 [... nameN]]\n\n"
				   "names are filenames or paths; paths only "
				   "make sense with the -r option set"))
	parser.add_option("-p", "--processes", dest="count", default=7,
					  type="int",
					  help=("the number of child processes to use (1..20) "
							"[default %default]"))
	parser.add_option("-r", "--recurse", dest="recurse",
					  default=False, action="store_true",
					  help="recurse into subdirectories")
	parser.add_option("-d", "--debug", dest="debug", default=False,
					  action="store_true")
	opts, args = parser.parse_args()
	if len(args) == 0:
		parser.error("a word and at least one path must be specified")
	elif len(args) == 1:
		parser.error("at least one path must be specified")
	if (not opts.recurse and 
		not any([os.path.isfile(arg) for arg in args])):
		parser.error("at least one file must be specified; or use -r")
	if not (1 <= opts.count <= 20):
		parser.error("process count must be 1..20")
	return opts, args[0], args[1:]
	
def get_files(args, recurse):
	filelist = []
	for path in args:
		if os.path.isfile(path):
			filelist.append(path)
		elif recurse:
			for root, dirs, files in os.walk(path):
				for filename in files:
					filelist.append(os.path.join(root, filename))
	return filelist
	
main()

 

grepword-p-child.py

#!/usr/bin/env python3
import sys

BLOCK_SIZE = 8000

number = "{0}: ".format(sys.argv[1]) if len(sys.argv) == 2 else ""
stdin = sys.stdin.buffer.read()
lines = stdin.decode("utf8", "ignore").splitlines()
word = lines[0].rstrip()

for filename in lines[1:]:
	filename = filename.rstrip()
	previous = ""
	try:
		with open(filename, "rb") as fh:
			while True:
				current = fh.read(BLOCK_SIZE)
				if not current:
					break
				current = current.decode("utf8", "ignore")
				if (word in current or 
					word in previous[-len(word):] +
							current[:len(word)]):
					print("{0}{1}".format(number, filename))
					break
				if len(current) != BLOCK_SIZE:
					break
				previous = current
	except EnviromentError as err:
		print("{0}{1}".format(number, err))
		
								

接下来将研究将工作分配给多线程执行!


登录 *


loading captcha image...
(输入验证码)
or Ctrl+Enter