#  2015. May 4th 
   #  Author: Jin, Yilong   jin28@vt.edu
   #
   # This program is free software: you can redistribute it and/or modify
   #  it under the terms of the GNU General Public License as published by
   #  the Free Software Foundation, either version 3 of the License, or
   #  (at your option) any later version.

   #  This program is distributed in the hope that it will be useful,
   #  but WITHOUT ANY WARRANTY; without even the implied warranty of
   #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   #  GNU General Public License for more details.

   #  You should have received a copy of the GNU General Public License
   #  along with this program.  If not, see <http://www.gnu.org/licenses/>.
   

from basic_classes import mention 
import time
import json, codecs, operator
import avro
from avro.datafile import DataFileReader, DataFileWriter
from avro.io import DatumReader, DatumWriter
import sys
from multiprocessing import Process, Manager, Value

#############  constants used to calculate tweet importance ###################
TERM_WEIGHT_CONSTANT = 0.2
NUM_OF_TERMS = 5

CONST_MIN_FOLLOW = 0
CONST_MAX_FOLLOW = 0

CONST_MIN_FAV = 0
CONST_MAX_FAV = 1

CONST_MIN_LIST = 0
CONST_MAX_LIST = 1

CONST_MIN_RT = 0
CONST_MAX_RT = 0
SCRIPT_RUN_TIME = 0.0

############################################################################# 

#dictionary contains tweet messages
TWEETS = dict()

#dictionary contains user importance values
UIV = dict()

# TIV is updated by multiple threads, therefore it is an instance of Manager dictionary
m = Manager()
TIV = m.dict()

# dictionary that contains edge information from one user to another 
EDGE = dict()

# dictionary that contains user follower count and list count
USER = dict()


# "@me_at_vt" : 0x32fdsaffdsa (a reference to the object) 
username_userstruct = dict()

def showTop10():
    global TIV
    global TWEETS
    top10 = sorted(TIV.values(), key=lambda k: k.values(), reverse = True)[:10]
    for i in top10:
        t_id = i.values()[1]
        tmp_dict = TWEETS.get(t_id)
        user_name = tmp_dict.get('user_screen_name')
        fav_count = tmp_dict.get('fav_count')
        retweet_count = tmp_dict.get('retweet_count')
        total_mention = mention.get_total_mentions_by_users(user_name)
        
        
        print tmp_dict.get('content')
        print user_name
        print 'score %f' % i.values()[0]
        print 'UIV: %r' % UIV.get(user_name)
        print '# of followers: %d' % USER.get(user_name).get('follower_count')
        print 'list count: %d' % USER.get(user_name).get('list_count')
        print 'mentioned by users in the collection %d times' % total_mention
        print 'retweet count: %d' % retweet_count 
        print 'fav_count: %d' % fav_count
        print '----------------------\n'
        

def output_tweet_importance(output_name, output_schema_path):
    
    output_schema = avro.schema.parse(open(output_schema_path).read())
    output_avro_writer = DataFileWriter(open(output_name, "w"), DatumWriter(), output_schema)
   
    tmp_counter = 0
    lenth = len(TIV)
    for i in TIV.values():
        tmp_counter += 1
        sys.stdout.write('\rwriting %d/%d output')
        sys.stdout.flush()
        output_avro_writer.append(i)
    output_avro_writer.close() 
    return tmp_counter


def calculate_tweet_importance(verbose, start, end, proc, TIV):
    
    global TERM_WEIGHT_CONSTANT
    global NUM_OF_TERMS

    global CONST_MIN_FOLLOW 
    global CONST_MAX_FOLLOW

    global CONST_MIN_FAV
    global CONST_MAX_FAV

    global CONST_MIN_LIST
    global CONST_MAX_LIST

    global CONST_MIN_RT 
    global CONST_MAX_RT
    
    global TWEETS
    
    total = 0 
    
    #print 'proc_%d is processing %d to %d' % (proc, start, end-1)
    #return
    
    for tweet_id, meta in TWEETS.items()[start: end]:
         
        #sys.stdout.write('\rcalculating importance: %d/%d' % (total, len(TWEETS)))
        fav_count = meta.get('fav_count')
        retweet_count = meta.get('retweet_count')
        user_screen_name = meta.get('user_screen_name') 
        
        # get user follower count
        follower_count =  USER[user_screen_name].get('follower_count')
        
        # get user list count
        list_count =  USER[user_screen_name].get('list_count')

        
        #term1  Favorite Count  --   (# Fav(i) - Fav(min) ) / ( Fav(max) - Fav(min) )
        term1 = float(float(fav_count - CONST_MIN_FAV) / float(CONST_MAX_FAV - CONST_MIN_FAV) + 1)

        #term2  Retweet Count  --   (# RT(i) - RT(min) ) / ( RT(max) - RT(min) )
        term2 = float(float(retweet_count - CONST_MIN_RT) / float(CONST_MAX_RT - CONST_MIN_RT) + 1)
        
        # term3 List Count          --   (# List(i) - List(min) ) / ( List(max) - List(min) )
        term3 = float(float(list_count - CONST_MIN_LIST) / float(CONST_MAX_LIST - CONST_MIN_LIST) + 1)

        # term4 Number of Followers  --   (# Followers(i) - Followers(min) ) / ( Followers(max) - Followers(min) )
        term4 = float(float(follower_count - CONST_MIN_FOLLOW) / float(CONST_MAX_FOLLOW  - CONST_MIN_FOLLOW) + 1)
        
        term5 = UIV.get(user_screen_name)
        if term5 is None:
            term5 = 0
    
        
        #important = sum(term_i) * TERM_WEIGHT_CONSTANT / # of terms used
        #TERM_WEIGHT_CONSTANT = 0.2
        sum = term1 + term2 + term3 + term4 + term5
        importance = float(sum) / NUM_OF_TERMS * TERM_WEIGHT_CONSTANT
        TIV[tweet_id] = {'doc_id': tweet_id, 'importance' : importance}
        total += 1 
       
    print 'proc_%d processed %d tweets' % (proc, total)
    return total 

def update_EDGE(u1, u2, verbose=False):
    """update_EDGE goes both ways""" 
    mention_of_u1_by_u2 = mention.get_mentioned_by_stat(u2, u1)
    total_mention_u2 = mention.get_total_mention(u2)
    mention_of_u2_by_u1 = mention.get_mentioned_by_stat(u1, u2)
    total_mention_u1 = mention.get_total_mention(u1)
    
    weight1 = 0.0
    weight2 = 0.0
  
    
    if total_mention_u2 != 0:
        weight1 = float(mention_of_u1_by_u2) / float(total_mention_u2)
        
    if total_mention_u1 != 0:
        weight2 = float(mention_of_u2_by_u1) / float(total_mention_u1)

    tmp1 = EDGE.get(u1) 
    tmp2 = EDGE.get(u2) 
   
    # here, update edge from u1 -> u2
    if tmp1 is None:
       EDGE[u1] = dict()
    EDGE[u1][u2] = weight1
   
    # here, update edge from u2 -> u1
    if tmp2 is None:
       EDGE[u2] = dict()
    EDGE[u2][u1] = weight2
    if verbose:  
        print '\n=========================' 
        print 'mention_of_u1_by_u2: %d' % mention_of_u1_by_u2 
        print 'mention_of_u2_by_u1: %d' % mention_of_u2_by_u1
        print 'total mention u1 %d' % total_mention_u1
        print 'total mention u2 %d' % total_mention_u2
        print 'weight1 : %f' % weight1    
        print 'weight2 : %f' % weight2
        print '=========================' 
    

def calculate_UIV():
    all_user_set = set(mention.mention_user_dict.keys()) |  set(mention.mentioned_by_user_dict.keys())
    tmp_counter = 0 
    for user in all_user_set:
        tmp_counter += 1
        printProgress(tmp_counter, len(all_user_set), 'calculating UIV') 
        UIV_helper(user)
        
def UIV_helper(u_name, verbose = False):
    """ """
    global UIV
    inlink_edges = mention.get_total_mentions_by_users(u_name)
    new_UIV = 0.0
    UIV[u_name] = new_UIV
    
    all_edge_uname_dict = None 
    #if the inlinks 
    if inlink_edges != 0:
        all_edge_uname_dict = EDGE.get(u_name)
        tmp_weight_sum = 0.0
        if all_edge_uname_dict is not None:
            tmp_weight_sum = sum(all_edge_uname_dict.values())
            
        new_UIV = float(tmp_weight_sum) / float(inlink_edges)
    
    UIV[u_name] = new_UIV
    
    if verbose: 
        print '\n=========================' 
        print 'UIV(%s) is: %r' % (u_name, new_UIV)
        if inlink_edges != 0:
            valstr = ''
            for i in all_edge_uname_dict.values():
                valstr += '%f+ ' % i 
                print valstr
                print '--------------------'
                print '      %d' % inlink_edges
        print 'inlink_edge of %s is %d' % (u_name, inlink_edges)
        print '=========================' 
    

##############  global functions #########

def print_tweet_mention():
    total = 0
    count = 0
    for tweet in tweet_array:
        mentioned_count =tweet.get_user_mentioned_count() 
        if  mentioned_count != 0:
            #print 'tweet_id_%s mentioned: %d users' % \
            #        (tweet.get_tweet_id(), mentioned_count)

            for user_handle in tweet.get_user_mentioned_list():
                total += 1
                if user_handle in username_userstruct.keys():
                    count += 1
                    msg = '\t%s (in collection)' % user_handle
                else:
                    msg = '\t%s' % user_handle
            #print msg
    print '%d/%d users in collection' % (count, total)

def print_tweet_stat():
    counter = 0
    total = len(tweet_array)

    for tweet in tweet_array:
        rt_count = tweet.get_rt_count()
        if rt_count != 0:
            counter += 1
        print 'tweet_id_%s has %d RT' % \
            (tweet.get_tweet_id(),rt_count)
    print '%d/%d has RT' % (counter, total)