import re; class return_visitors: def __init__(self, n): self.m_n = n; self.m_ip_days = {}; def begin(self): pass; def process_line(self, s): try: array = s.split(); ip = array[0]; day = array[3][1:7]; if self.m_ip_days.has_key(ip): if day not in self.m_ip_days[ip]: self.m_ip_days[ip].append(day); else: self.m_ip_days[ip] = []; self.m_ip_days[ip].append(day); except IndexError: pass; def end(self): ips = self.m_ip_days.keys(); count = 0; for ip in ips: if len(self.m_ip_days[ip]) > self.m_n: count += 1; self.m_count = count; def description(self): return "# of IP addresses that visited more than %s days" % self.m_n; def result(self): return self.m_count; class referring_domains: def __init__(self): self.m_domains = {}; def begin(self): pass; def process_line(self, line): try: array = line.split(); referrer = array[10]; m = re.search('//[a-zA-Z0-9\-\.]*\.[a-zA-z]{2,3}/', referrer); length = len(m.group(0)); domain = m.group(0)[2:length-1]; if self.m_domains.has_key(domain): self.m_domains[domain] += 1; else: self.m_domains[domain] = 1; except AttributeError: pass; except IndexError: pass; def end(self): pass; def description(self): return "Referring domains"; def sort(self, key1, key2): if self.m_domains[key1] > self.m_domains[key2]: return -1; elif self.m_domains[key1] == self.m_domains[key2]: return 0; else: return 1; def result(self): s = ""; keys = self.m_domains.keys(); keys.sort(self.sort); for domain in keys: s += domain; s += " "; s += str(self.m_domains[domain]); s += "\n"; s += "\n\n"; return s;