#!/usr/bin/env python2.2 import sys, os, socket, thread, re, time, base64 from Tkinter import * from tkMessageBox import showerror, showwarning from tkSimpleDialog import askstring, askinteger from tkMessageBox import askyesno from tkFileDialog import asksaveasfilename """ A small program for retrieving a file through HTTP in parts, much like programs like Download Accelerator do, but then a lot simpler: this script does nothing more than just that. The user can enter the chunksize and the maximum number of threads running at the same time. """ TMPDIR = '[__TMPDIR__]' TMPDIR = '/tmp/chunkie_partial' class HTTPException (Exception): """Raised when there's a HTTP error""" class NumchunkWarning (Exception): """Raised when the current number of thread setting is different from the one used on a partial previous download of the same file """ class UrlWarning (Exception): """Raised when the URL of a package already downloaded is different from the previous URL of a partial previous dowloaded package with the same name """ class chunked_http_loader: numthreadsrunning = 0 nbufsize = 8192 fbufsize = 8192 debug = 1 finished_downloading = 0 temp_path = "%s%sChunks" % (TMPDIR, os.sep) resume = 1 def __init__(self, url, outputfilename, numchunks=None): """Initialize the downloader Here most of the work is done: the urlstring is parsed, the connection is checked for the possibility of partial downloading and filesize and the downloads are started. """ self.read = 0 self.read_total = 0 self.warned_for_different_url = 0 self.outputfilename = outputfilename if not numchunks: numchunks = 6 self.numchunks = int(numchunks) # Parse the url (self.host, self.port, self.filename) = self.parse_url(url) # Test the connection for partial download-capability and get filesize filesize = self.get_info(self.host, self.port, self.filename) self.filesize = filesize if filesize == 0: raise HTTPException, "Content-Length unknown." def reset_to_new(self): datfile = self.temp_path + os.path.basename(self.filename) + ".dat" self.resume = 0 if os.path.isfile(datfile): (oldnumchunks, oldurl) = open(datfile, 'r').readlines() os.unlink(datfile) for i in range(int(oldnumchunks)): os.unlink('%s%s%s' % (self.temp_path, os.path.basename(self.filename), str(i))) open(datfile, 'w').write('%s\nhttp://%s:%s/%s' % ( str(self.numchunks), self.host, str(self.port), self.filename)) def test_resume(self): datfile = self.temp_path + os.path.basename(self.filename) + ".dat" #if not self.resume: # if os.path.isfile(datfile): # (oldnumchunks, oldurl) = open(datfile, 'r').readlines() # os.unlink(datfile) # for i in range(int(oldnumchunks)): # os.unlink(self.temp_path + # os.path.basename(self.filename) + str(i)) if not os.path.isfile(datfile): open(datfile, 'w').write('%s\nhttp://%s:%s/%s' % ( str(self.numchunks), self.host, str(self.port), self.filename)) else: (oldnumchunks, oldurl) = open(datfile, 'r').readlines() if int(oldnumchunks) != int(self.numchunks): raise NumchunkWarning, ("Error: old number of chunks " "differs from new number of " "chunks!") newurl = "http://%s:%s/%s" ( self.host, str(self.port), self.filename) if oldurl != newurl and self.warned_for_different_url == 0: raise UrlWarning, ("Warning: URL from old download differs " "from URL from new download!") self.warned_for_different_url = 1 def start_download(self): self.starttime = int(time.time()) numchunks = self.numchunks filesize = self.filesize if numchunks > 1: chunksize = filesize / (numchunks - 1) lastchunksize = filesize % (numchunks - 1) if not filesize % (numchunks - 1): chunksize = filesize / (numchunks) lastchunksize = chunksize + (filesize % (numchunks)) self.numthreadsrunning = numchunks for i in range(numchunks - 1): thread.start_new_thread(self.get_chunk, (i, self.host, self.port, self.filename, i * chunksize, chunksize)) thread.start_new_thread(self.get_chunk, (i + 1, self.host, self.port, self.filename, (i + 1) * chunksize, lastchunksize)) elif numchunks == 1: self.numthreadsrunning = 1 thread.start_new_thread(self.get_chunk, (0, self.host, self.port, self.filename, 0, filesize)) else: raise SystemError, \ "Error: illegal number of chunks (%s)" % numchunks while self.numthreadsrunning > 0: time.sleep(0.1) self.finished_downloading = 1 def build_output(self): outputfilename = self.outputfilename filename = self.filename numchunks = self.numchunks file = self.temp_path + os.path.basename(filename) outputfile = open(outputfilename, 'wb') for i in range(numchunks): fp = open(file + str(i), 'rb') while 1: chunk = fp.read(self.fbufsize) if not chunk: break outputfile.write(chunk) fp.close() os.unlink(file + str(i)) outputfile.close() os.unlink(file + ".dat") def parse_url(self, url): reg_url = re.compile("^([a-zA-Z]+://)?(.*?):?([0-9]+)?/(.*)$") match = reg_url.search(url) if not match: raise HTTPException, "URL in wrong format!" host = match.group(2) port = 80 if match.group(3): port = int(match.group(3)) filename = match.group(4) return (host, port, filename) def get_info(self, host, port, filename): """Get info about the host Returns None if partial downloading is not allowed, 0 if partial downloading is allowed but no content-length is returned and the content-length if both are returned. """ # Build request testrequest = ( "HEAD /%s HTTP/1.0\r\n" "Host: %s\r\n" "User-Agent: Johnny deBris' Chunked Downloader\r\n" "Connection: close\r\n" "Accept: */*\r\n\r\n") % (filename, host) s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.connect((host, int(port))) s.send(testrequest) response = "" while 1: byte = s.recv(1) response += byte if response[-4:] == "\r\n\r\n": break if response.find('200 OK') == -1: reg_error = re.compile("^HTTP/[0-9/.]+ (.*)") match = reg_error.search(response) if not match: raise HTTPException, "Unknown response" else: raise HTTPException, ("Error reported by server: %s" % match.group(1)) open("output", "w").write(response) reg_accept = re.compile("Accept-Ranges: bytes") match = reg_accept.search(response) if not match: raise HTTPException, "Partial downloading not supported!" reg_size = re.compile("Content-Length: ([0-9]+)") match = reg_size.search(response) if not match: return 0 else: return int(match.group(1)) def get_chunk(self, id, host, port, filename, start, size): """ get_chunk(chunksize): download a chunk """ file = self.temp_path + os.path.basename(filename) + str(id) # If the file already exists, continue instead of starting from scratch if os.path.isfile(file) and self.resume: fsize = os.path.getsize(file) start = start + fsize size = size - fsize self.read_total += fsize request = ("GET /%s HTTP/1.0\r\n" "Host: %s\r\n" "Range: bytes=%s-%s\r\n" "User-Agent: Johnny deBris' Chunked Downloader v0.1\r\n" "Connection: close\r\n" "Accept: */*\r\n\r\n") % ( filename, host, start, start + size - 1) s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.connect((host, int(port))) s.send(request) response = "" while 1: byte = s.recv(1) response += byte if response[-4:] == "\r\n\r\n": break if ((response.find('206 Partial Content') == -1 and response.find('200 OK') == -1) or response.find('Content-Length: ' + str(size)) < -1): self.numthreadsrunning -= 1 reg_error = re.compile("^HTTP/[0-9/.]+ (.*)") match = reg_error.search(response) if not match: raise HTTPException, "Unknown response" else: raise HTTPException, ("Error reported by server: %s" % match.group(1)) # Now download the chunk fp = open(file, "ab") read = 0 self.last_percentage_shown = 0 while read < size: if read < size - self.nbufsize: data = s.recv(self.nbufsize) elif read < size: data = s.recv(size % self.nbufsize) fp.write(data) read += len(data) self.read += len(data) self.read_total += len(data) #try: # print self.read / (int(time.time()) - self.starttime) #except: # print "0" fp.close() self.numthreadsrunning -= 1 def adjust_numchunks(self): datfile = self.temp_path + os.path.basename(self.filename) + ".dat" (numchunks, url) = open(datfile, 'r').readlines() self.numchunks = int(numchunks) def get_percentage_loaded(self): return int(float(self.read_total) / float(self.filesize) * 100) def get_bytes_loaded(self): return int(self.read_total) def get_bytes_total(self): return int(self.filesize) def get_average_speed(self): return int(self.read / (time.time() - self.starttime)) def get_numchunks(self): return int(self.numchunks) class Chunkie: def __init__(self, url=""): root = Tk() root.title("Chunkie") self.root = root self.urlstring = StringVar() self.outputstring = StringVar() self.numthreadint = IntVar() self.totalbytesint = IntVar() self.numbytesint = IntVar() self.percentint = IntVar() self.speedint = IntVar() self.eltimeint = IntVar() self.esttimeint = IntVar() self.tottimeint = IntVar() self.urlstring.set(url) self.numthreadint.set(10) self.totalbytesint.set(0) self.numbytesint.set(0) self.percentint.set(0) self.speedint.set(0) self.eltimeint.set(0) self.esttimeint.set(0) self.tottimeint.set(0) lblurl = Label(root, text="URL to download:") lblurl.grid(row=0, column=0) enturl = Entry(root, textvariable=self.urlstring) enturl.grid(row=0, column=1) lblurl = Label(root, text="Path for output:") lblurl.grid(row=1, column=0) enturl = Entry(root, textvariable=self.outputstring) enturl.grid(row=1, column=1) lblnumthreads = Label(root, text="Number of threads:") lblnumthreads.grid(row=2, column=0) entnumthreads = Entry(root, textvariable=self.numthreadint) entnumthreads.grid(row=2, column=1) lblnumbytes = Label(root, text="Total number of bytes:") lblnumbytes.grid(row=3, column=0) entnumbytes = Entry(root, textvariable=self.totalbytesint) entnumbytes.grid(row=3, column=1) lblnumbytes = Label(root, text="Number of bytes loaded:") lblnumbytes.grid(row=4, column=0) entnumbytes = Entry(root, textvariable=self.numbytesint) entnumbytes.grid(row=4, column=1) lblpercent = Label(root, text="Percentage loaded:") lblpercent.grid(row=5, column=0) entpercent = Entry(root, textvariable=self.percentint) entpercent.grid(row=5, column=1) lblpercent = Label(root, text="Average speed:") lblpercent.grid(row=6, column=0) entpercent = Entry(root, textvariable=self.speedint) entpercent.grid(row=6, column=1) lbleltime = Label(root, text="Time elapsed:") lbleltime.grid(row=7, column=0) enteltime = Entry(root, textvariable=self.eltimeint) enteltime.grid(row=7, column=1) lblesttime = Label(root, text="Estimated time left:") lblesttime.grid(row=8, column=0) entesttime = Entry(root, textvariable=self.esttimeint) entesttime.grid(row=8, column=1) lbltottime = Label(root, text="Estimated total time:") lbltottime.grid(row=9, column=0) enttottime = Entry(root, textvariable=self.tottimeint) enttottime.grid(row=9, column=1) btnfrm = Frame(root) btnfrm.grid(row=10, column=0, columnspan=2) btnstart = Button(btnfrm, text="Start", command=self.start) btnstart.grid(row=0, column=0) btnquit = Button(btnfrm, text="Browse", command=self.set_outputpath) btnquit.grid(row=0, column=1) btnquit = Button(btnfrm, text="Quit", command=self.end) btnquit.grid(row=0, column=2) while self.outputstring.get() == "": filename = os.path.basename(url) self.outputstring.set(asksaveasfilename(initialfile=filename)) root.mainloop() def set_outputpath(self): newname = "" while newname == "": newname = asksaveasfilename() self.outputstring.set(newname) def start(self): try: self.chl = chunked_http_loader(self.urlstring.get(), self.outputstring.get(), self.numthreadint.get()) except: showerror("Error initializing download", str(sys.exc_info()[1])) self.end() while 1: try: self.chl.test_resume() break except NumchunkWarning: answer = askyesno("Warning", ("The number of chunks you chose differs from " "the number of chunks in the previous " "download. Choose 'yes' to adjust the number " "of chunks and continue the previous " "download, 'no' to start from the beginning " "with the current settings")) if answer: self.chl.adjust_numchunks() self.numthreadint.set(self.chl.get_numchunks()) continue else: self.chl.reset_to_new() break except UrlWarning: if askyesno("Warning", ("The URL from the old download is different than the " "current URL. Choose 'yes' to continue the previous " "download and 'no' to start a new download.")): break else: self.chl.reset_to_new() break thread.start_new_thread(self.chl.start_download, ()) self.totalbytesint.set(self.chl.get_bytes_total()) self.root.after(100, self.update) def update(self): bytes_total = self.chl.get_bytes_total() bytes_loaded = self.chl.get_bytes_loaded() percentage_loaded = self.chl.get_percentage_loaded() average_speed = self.chl.get_average_speed() or 1 self.percentint.set(percentage_loaded) self.numbytesint.set(bytes_loaded) self.speedint.set(average_speed) self.root.title('%s - %s%%' % ( os.path.basename(self.chl.filename), percentage_loaded)) starttime = self.chl.starttime curtime = time.time() time_elapsed = curtime - starttime total_time = bytes_total / average_speed self.eltimeint.set(int(time_elapsed)) self.esttimeint.set(int(total_time - time_elapsed) + 1) self.tottimeint.set(total_time) if not self.chl.finished_downloading: self.root.after(100 , self.update) else: self.start_building() showwarning("Finished", "Download finished in %s seconds" % int(time_elapsed)) self.end() def start_building(self): self.root.title("Building output...") self.chl.build_output() def end(self): self.root.destroy() class ActiveChunkie: _reg_clsid_ = "[__CLASSID__]" _reg_desc_ = "Chunkie Downloader" _reg_progid_ = "Python.ActiveChunkie" _public_methods_ = ['start_download'] def start_download(self, url): try: c = Chunkie(str(url)) except: return "Quitted" def register_com(): import win32com.server.register win32com.server.register.UseCommandLine(ActiveChunkie) if __name__ == '__main__': if len(sys.argv) > 1 and sys.argv[1] == 'register': register_com() else: starttime = time.time() try: url = "http://www.python.org/ftp/python/2.2.1/Python-2.2.1.exe" numchunks = None if len(sys.argv) > 1: url = sys.argv[1] c = Chunkie(url) except SystemExit: pass except: print str(sys.exc_info()[0]) + ": " + str(sys.exc_info()[1])