#!/usr/bin/env python

import sys
import os
#import commandsplus
#import re
import email.Parser
import pygtk
pygtk.require("2.0")
import gtk
import gtk.glade
from threading import Thread,currentThread
import signal
import gobject

#if len(sys.argv) < 2:
	#sys.stderr.write("usage: remove-mh-duplicates <mh mail folder>\n")
	#sys.exit(os.EX_USAGE)

#folder = sys.argv[1]

#regex = re.compile("^Message-ID: (.*)$",re.M|re.I)

class Bucket:
	def __init__(self):
		self.bucket_map = {}

	def add(self,messageid,filename):
		if not self.bucket_map.has_key(messageid):
			self.bucket_map[messageid] = []
		self.bucket_map[messageid].append(filename)

	def __get_matching(self,eval_function):
		dupes = {}
		for mid,filenames in self.bucket_map.iteritems():
			if eval_function(filenames): dupes[mid] = filenames
		return dupes

	def get_dupes(self):
		def morethanone(alist): return len(alist) > 1
		return self.__get_matching(morethanone)

	def get_empty(self):
		def emptylist(alist): return len(alist) == 0
		return self.__get_matching(emptylist)

class NotAMessage(Exception): pass

def locate_dupes(directory,progress_function,continuation_function):
	# continuation function gets called with a dictionary of duplicates, or an exception
	# progress function gets called with a percentage and the current operating file
	bucket = Bucket()
	dirpath,dirnames,filenames = os.walk(directory,topdown=True).next()
	total = float(len(filenames))
	current = 0
	for f in filenames:
		filename = f
		f = os.path.join(directory,f)
		try:
			progress_function(current/total,filename)
			p = email.Parser.HeaderParser()
			headers = p.parse(file(f))
			mid = headers["Message-ID"]
			if mid is None:
				print "Warning: message %s has no Message-ID or could not be parsed"%filename
			else:
				bucket.add(mid,filename)
			current = current + 1
		except Exception, e:
			continuation_function(e)
			return
	continuation_function(bucket.get_dupes())


def get_shared_path():
	testfile = 'version'
	sharedirs = [".",os.path.join(os.path.dirname(sys.argv[0]),"../share/remove-mh-duplicates")]
	sharepath = None
	for sharedir in sharedirs:
		fname = os.path.join(os.path.abspath(sharedir),testfile)
		if os.path.exists(fname):
			sharepath = os.path.abspath(sharedir)
			break
	
	if sharepath is None:
		raise Exception, "Remove MH duplicates shared files " + testfile + " cannot be found in any of " + str(sharedirs) + " default paths"
	
	return sharepath


class RemoveGUI (gtk.glade.XML):
	application = None

	def __init__ (self,sharepath):
		self.threadcontext = currentThread()
		self.sharepath = sharepath
		gtk.glade.XML.__init__(self,os.path.join(self.sharepath,'remove-mh-duplicates.glade'))
		self.signal_autoconnect(self)
		for w in ["application","folder_chooser","scan_progress","message_list","execute","close"]:
			setattr(self,w,self.get_widget(w))
			
		self.scanning_folder = None
		self.scanned_folder = None
		self.scan_results = None
		
		col_name = gtk.TreeViewColumn("File", gtk.CellRendererText(),text=0)
		col_status = gtk.TreeViewColumn("Message ID", gtk.CellRendererText(),text=1)
 		self.message_list.insert_column(col_name, 0)
 		self.message_list.insert_column(col_status, 1)
		
		self.application.show()
		
	def w(self,name):
		return self.application.get_widget(name)
	
	def folder_chosen(self,w):
		filename = self.folder_chooser.get_filename()
		self.scan_folder(filename)
		
	#def set_display_folder(self,filename):
		#self.folder_chooser.handler_block(self.folder_chosen)
		#self.folder_chooser.set_filename(filename)
		
	def scan_folder(self,folder):
		self.scanning_folder = folder
		self.scanned_folder = None
		self.scan_results = None
		self.execute.set_sensitive(False)
		self.folder_chooser.set_sensitive(False)
		t = Thread(target=locate_dupes,args=(folder,self.update_progress,self.scan_finished))
		t.setDaemon(True)
		t.start()
		self.scan_progress.show()
	
	def update_progress(self,percentage,current_object):
		if currentThread() != self.threadcontext:
			gtk.threads_enter()
			gobject.idle_add(self.update_progress,percentage,current_object)
			gtk.threads_leave()
			return
		self.scan_progress.set_property("fraction",percentage)
		self.scan_progress.set_text("Scanning file %s"%current_object)
		#print percentage
		#print current_object
		
	def scan_finished(self,result):
		if currentThread() != self.threadcontext:
			gtk.threads_enter()
			gobject.idle_add(self.scan_finished,result)
			gtk.threads_leave()
			return
		self.folder_chooser.set_sensitive(True)
		if isinstance(result,Exception):
			self.scan_progress.set_text("Error: %s: %s"%(str(result.__class__),str(result)))
			self.scanned_folder = None
			self.scan_results = None
		else:
			self.scan_progress.set_text("Scanning finished")
			self.scanned_folder = self.scanning_folder
			self.scanning_folder = None
			self.scan_results = result
			self.populate_message_list(result)
			if result: self.execute.set_sensitive(True)
			else: self.execute.set_sensitive(False)
	
	def populate_message_list(self,result):
		store = gtk.ListStore(str,str)
		self.message_list.set_model(store)
		for key,values in result.iteritems():
			for value in values:
				store.append((value,key))
	
	def remove_duplicates(self,w=None):
		store = gtk.ListStore(str,str)
		self.message_list.set_model(store)
		self.execute.set_sensitive(False)
		for key,values in self.scan_results.iteritems():
			toremove = values[1:]
			for value in toremove:
				self.scan_progress.set_text("Removing %s..."%value)
				os.unlink(os.path.join(self.scanned_folder,value))
				#print "Removing %s"%os.path.join(self.scanned_folder,value)
		self.scanned_folder = None
		self.scanning_folder = None
		self.scan_results = None
		self.scan_progress.set_text("Duplicates removed")
	
	def quit(self,w=None):
		gtk.main_quit()


app = RemoveGUI(get_shared_path())
gtk.threads_init()
if len(sys.argv) > 1: gobject.idle_add(app.scan_folder,sys.argv[1])
gtk.main()

#print "dupes:"
#print bucket.get_dupes()
#print ""
#print "empty buckets"
#print bucket.get_empty()
