# This is a sample Python script. # Press ⌃R to execute it or replace it with your code. # Press Double ⇧ to search everywhere for classes, files, tool windows, actions, and settings. import numpy as np # grid size rows = 5 cols = 5 startPos = (0, 0) dirtPos = (4, 4) dirtPos2 = (2, 1) dirtPos3 = (1, 3) episodes = 500 # select a number of episodes to train the model n_episodes = 5000 # maximum of iteration per episode max_iter_episode = 100 explore_prob = 0.5 # the percentage of time when we should take the best action (instead of a random action) discount_factor = 0.8 # discount factor for future rewards class VacuumWorld: def __init__(self, startPos, dirtPos, explore_prob, discount_factor): self.rows = rows self.columns = cols self.start = startPos self.dirt = dirtPos self.n_episodes = n_episodes self.explore_prob = explore_prob self.discount_factor = discount_factor self.testPhase = False # define the grid, dirt position(s), the lookup table and the actions for movement def initialize(self): self.grid = np.full((self.rows, self.columns), -1) self.grid[self.dirt] = 100 self.q_lookup = np.zeros((self.rows, self.columns, 4)) self.actions = ['up', 'right', 'down', 'left'] # check if the agent found a dirty square or not def analyze(self, row, col): if self.grid[row, col] == -1: return False elif self.grid[row, col] == 100: if self.testPhase: print('Located dirt! Cleaning....') # clean action self.grid[row, col] = -1 return True else: return True def make_choice(self, row, col): if np.random.random() < explore_prob: return np.argmax(self.q_lookup[row, col]) else: return np.random.randint(4) # define a function that will get the next location based on the chosen action def make_move(self, row, col, choice): if self.actions[choice] == 'up' and row > 0: return row - 1, col elif self.actions[choice] == 'down' and row < self.rows - 1: return row + 1, col elif self.actions[choice] == 'left' and col > 0: return row, col - 1 elif self.actions[choice] == 'right' and col < self.columns - 1: return row, col + 1 return row, col # train the model to search for dirt in the squares with highest reward probability def train(self): for episode in range(self.n_episodes): row, column = self.start while not self.analyze(row, column): choice = self.make_choice(row, column) prev_row = row prev_column = column row, column = self.make_move(row, column, choice) reward = self.grid[row, column] new_q_value = reward + (self.discount_factor * np.max(self.q_lookup[row, column])) self.q_lookup[prev_row, prev_column, choice] = new_q_value def clean_grid(self, start): row, col = start if self.analyze(row, col): return [] else: row, col = start agent_path = [] agent_path.append([row, col]) while not self.analyze(row, col): choice = self.make_choice(row, col) row, col = self.make_move(row, col, choice) agent_path.append([row, col]) return agent_path def search(): model = VacuumWorld(startPos, dirtPos, explore_prob, discount_factor) model.initialize() model.grid model.train() model.testPhase = True print(model.clean_grid(startPos)) model = VacuumWorld(startPos, dirtPos2, explore_prob, discount_factor) model.initialize() model.grid model.train() model.testPhase = True print(model.clean_grid(dirtPos)) model = VacuumWorld(startPos, dirtPos3, explore_prob, discount_factor) model.initialize() model.grid model.train() model.testPhase = True print(model.clean_grid(dirtPos2)) # Press the green button in the gutter to run the script. if __name__ == '__main__': search() # See PyCharm help at https://www.jetbrains.com/help/pycharm/