#Created By: Sarika Padmashali
import json
import csv
from collections import defaultdict
import sys
import pandas
import random
import numpy 
#Taking inputs - user ID and restaurant ID from command line
userID = unicode(sys.argv[1])
restaurantID = unicode(sys.argv[2])
print "User ID entered: ", userID
print "Restaurant ID entered: ", restaurantID
data = []
user_rating = {}
#Count of unique users
unique_users = []
#Count of unique restaurants      
unique_restaurants = []      
count = 0
#Finding unique users and restaurants in the matrix
with open('reviews.json') as f:
    for line in f:
        count = count+1
        data = json.loads(line)
        if not data["user_id"] in unique_users:
            unique_users.append(data["user_id"])       
        if not data["business_id"] in unique_restaurants:
            unique_restaurants.append(data["business_id"])       
restaurant_index = [x for x in range(len(unique_restaurants))] 
user_index = [x for x in range(len(unique_users))]    
restaurant_mapping = dict(zip(unique_restaurants,restaurant_index))
user_mapping = dict(zip(unique_users,user_index))                
N = len(user_mapping) 
M = len(restaurant_mapping)
#Creating a matrix R of ratings and initializing 0 
R =[]
for i in range(N):
    b = list()
    for j in range(M):
         b.insert(j,0)
    R.append(b)
#Loading reviews data and populating the rating matrix R    
with open('reviews.json') as f:
    for line in f:
        data = json.loads(line)
        restaurant = restaurant_mapping[data["business_id"]]
        user = user_mapping[data["user_id"]]
        R[user][restaurant] = data["stars"]                                      
#Finding K features. K = 2 in our case taken randomly
K = 2   
#P and Q are the factors we want to find
P = numpy.random.rand(N,K) 
Q = numpy.random.rand(M,K)
#Latent matrix factorization using gradient descent 
def latent_matrix_factorization(R, P, Q, K, steps=500, alpha=0.0002, beta=0.02):
    Q = Q.T
    for step in xrange(steps):
        for i in xrange(len(R)):
            for j in xrange(len(R[i])):
                if R[i][j] > 0:
                    eij = R[i][j] - numpy.dot(P[i,:],Q[:,j])
                    for k in xrange(K):
                        P[i][k] = P[i][k] + alpha * (2 * eij * Q[k][j] - beta * P[i][k])
                        Q[k][j] = Q[k][j] + alpha * (2 * eij * P[i][k] - beta * Q[k][j])
        eR = numpy.dot(P,Q)
        e = 0
        for i in xrange(len(R)):
            for j in xrange(len(R[i])):
                if R[i][j] > 0:
                    e = e + pow(R[i][j] - numpy.dot(P[i,:],Q[:,j]), 2)
                    for k in xrange(K):
                        e = e + (beta/2) * (pow(P[i][k],2) + pow(Q[k][j],2))
#If error goes below 0.001 then stop
        if e < 0.001:
            break
    return P, Q.T
factorP,factorQ = latent_matrix_factorization(R, P, Q, K)
approximate_R = numpy.dot(factorP, factorQ.T)  
user_entered_restaurantID =  restaurant_mapping[unicode(restaurantID)]
user_enetered_userID = user_mapping[unicode(userID)]
#print user_entered_restaurantID
#print user_enetered_userID
print "The predited rating for the user id and restaurant id entered above is",approximate_R[user_enetered_userID][user_entered_restaurantID]