#!/usr/bin/python from commands import getoutput import sys import argparse from os import system, chdir, getcwd parser = argparse.ArgumentParser(description="Spider to get files from apache directory listings") parser.add_argument("webpage", metavar="URL", help="URL of directory listing") parser.add_argument("-v", help="verbose", action="store_true") parser.add_argument("-c", help="count files to download, but don't download", action="store_true") parser.add_argument("-d", help="directory to store result in", default="."); parser.add_argument("-r", help="recursive download including subdirectories", action="store_true"); args = parser.parse_args() file_list = getoutput('wget -O - ' + args.webpage + ' 2> /dev/null | grep -v "href=\\"\/" | grep -o "href=\\"[a-Z0-9\._\-\\/]*" | sed "s/href=\\"//g" | grep .').split() if(args.c): print len(file_list) sys.exit(0) if(len(file_list) == 0): print "No files found for URL ", args.webpage sys.exit(1) getoutput("mkdir 2> /dev/null " + args.d) oldpath = getcwd() if sys.argv[0][0] == '/': our_name = sys.argv[0] else: our_name = oldpath + "/" + sys.argv[0] chdir(args.d) for filename in file_list: if(args.v): print filename if(filename[-1] == '/'): if(args.r): system(our_name + " -rd " + filename + " " + args.webpage + filename ) else: getoutput("wget " + args.webpage + filename)