# This is a sample Python script.

# Press ⌃R to execute it or replace it with your code.
# Press Double ⇧ to search everywhere for classes, files, tool windows, actions, and settings.
import numpy as np

# grid size
rows = 5
cols = 5

startPos = (0, 0)
dirtPos = (4, 4)
dirtPos2 = (2, 1)
dirtPos3 = (1, 3)

episodes = 500

# select a number of episodes to train the model
n_episodes = 5000

# maximum of iteration per episode
max_iter_episode = 100

explore_prob = 0.5  # the percentage of time when we should take the best action (instead of a random action)
discount_factor = 0.8  # discount factor for future rewards


class VacuumWorld:
    def __init__(self, startPos, dirtPos, explore_prob, discount_factor):
        self.rows = rows
        self.columns = cols
        self.start = startPos
        self.dirt = dirtPos
        self.n_episodes = n_episodes
        self.explore_prob = explore_prob
        self.discount_factor = discount_factor
        self.testPhase = False

    # define the grid, dirt position(s), the lookup table and the actions for movement
    def initialize(self):
        self.grid = np.full((self.rows, self.columns), -1)
        self.grid[self.dirt] = 100
        self.q_lookup = np.zeros((self.rows, self.columns, 4))
        self.actions = ['up', 'right', 'down', 'left']

    # check if the agent found a dirty square or not
    def analyze(self, row, col):
        if self.grid[row, col] == -1:
            return False
        elif self.grid[row, col] == 100:
            if self.testPhase:
                print('Located dirt! Cleaning....')  # clean action
                self.grid[row, col] = -1
            return True
        else:
            return True

    def make_choice(self, row, col):
        if np.random.random() < explore_prob:
            return np.argmax(self.q_lookup[row, col])
        else:
            return np.random.randint(4)

    # define a function that will get the next location based on the chosen action
    def make_move(self, row, col, choice):
        if self.actions[choice] == 'up' and row > 0:
            return row - 1, col
        elif self.actions[choice] == 'down' and row < self.rows - 1:
            return row + 1, col
        elif self.actions[choice] == 'left' and col > 0:
            return row, col - 1
        elif self.actions[choice] == 'right' and col < self.columns - 1:
            return row, col + 1
        return row, col

    # train the model to search for dirt in the squares with highest reward probability
    def train(self):
        for episode in range(self.n_episodes):
            row, column = self.start

            while not self.analyze(row, column):
                choice = self.make_choice(row, column)

                prev_row = row
                prev_column = column

                row, column = self.make_move(row, column, choice)

                reward = self.grid[row, column]
                new_q_value = reward + (self.discount_factor * np.max(self.q_lookup[row, column]))

                self.q_lookup[prev_row, prev_column, choice] = new_q_value

    def clean_grid(self, start):
        row, col = start
        if self.analyze(row, col):
            return []
        else:
            row, col = start
            agent_path = []
            agent_path.append([row, col])
            while not self.analyze(row, col):
                choice = self.make_choice(row, col)
                row, col = self.make_move(row, col, choice)
                agent_path.append([row, col])
        return agent_path


def search():
    model = VacuumWorld(startPos, dirtPos, explore_prob, discount_factor)
    model.initialize()
    model.grid
    model.train()
    model.testPhase = True
    print(model.clean_grid(startPos))

    model = VacuumWorld(startPos, dirtPos2, explore_prob, discount_factor)
    model.initialize()
    model.grid
    model.train()
    model.testPhase = True
    print(model.clean_grid(dirtPos))

    model = VacuumWorld(startPos, dirtPos3, explore_prob, discount_factor)
    model.initialize()
    model.grid
    model.train()
    model.testPhase = True
    print(model.clean_grid(dirtPos2))


# Press the green button in the gutter to run the script.
if __name__ == '__main__':
    search()

# See PyCharm help at https://www.jetbrains.com/help/pycharm/