Skip to content

optimize

OptimizeDefault

optimize(self, ga, **kwargs)

Creates initial pool

Parameters:

Name Type Description Default
ga GeneticAlgorithm

the ga instance

required
**kwargs

keyword arguments for plugins

{}

Returns:

Type Description
Dict[str, Any]

Dict 'population': np.ndarray, 'fitness': np.ndarray, 'best_lineup': pd.DataFrame, 'best_score': float

Source code in pangadfs/optimize.py
def optimize(self, ga: GeneticAlgorithm, **kwargs) -> Dict[str, Any]:
    """Creates initial pool

    Args:
        ga (GeneticAlgorithm): the ga instance
        **kwargs: keyword arguments for plugins

    Returns:
        Dict
        'population': np.ndarray,
        'fitness': np.ndarray,
        'best_lineup': pd.DataFrame,
        'best_score': float

    """
    # create pool and pospool
    # pospool used to generate initial population
    # is a dict of position_name: DataFrame
    pop_size = ga.ctx['ga_settings']['population_size']
    pool = ga.pool(csvpth=ga.ctx['ga_settings']['csvpth'])
    cmap = {'points': ga.ctx['ga_settings']['points_column'],
            'position': ga.ctx['ga_settings']['position_column'],
            'salary': ga.ctx['ga_settings']['salary_column']}
    posfilter = ga.ctx['site_settings']['posfilter']
    flex_positions = ga.ctx['site_settings']['flex_positions']
    pospool = ga.pospool(pool=pool, posfilter=posfilter, column_mapping=cmap, flex_positions=flex_positions)

    # create salary and points arrays
    # these match indices of pool
    cmap = {'points': ga.ctx['ga_settings']['points_column'],
            'salary': ga.ctx['ga_settings']['salary_column']}
    points = pool[cmap['points']].values
    salaries = pool[cmap['salary']].values

    # create initial population
    initial_population = ga.populate(
        pospool=pospool, 
        posmap=ga.ctx['site_settings']['posmap'], 
        population_size=pop_size
    )

    # apply validators
    # default is to valdate duplicates and salary
    # can add other validators as desired
    initial_population = ga.validate(
        population=initial_population, 
        salaries=salaries,
        salary_cap=ga.ctx['site_settings']['salary_cap']
    )

    # need fitness to determine best lineup
    # and also for selection when loop starts
    population_fitness = ga.fitness(
        population=initial_population, 
        points=points
    )

    # set overall_max based on initial population
    omidx = population_fitness.argmax()
    best_fitness = population_fitness[omidx]
    best_lineup = initial_population[omidx]

    # create new generations
    n_unimproved = 0
    population = initial_population.copy()

    for i in range(1, ga.ctx['ga_settings']['n_generations'] + 1):

        # end program after n generations if not improving
        if n_unimproved == ga.ctx['ga_settings']['stop_criteria']:
            break

        # display progress information with verbose parameter
        if ga.ctx['ga_settings'].get('verbose'):
            logging.info(f'Starting generation {i}')
            logging.info(f'Best lineup score {best_fitness}')

        # select the population
        # here, we are holding back the fittest 20% to ensure
        # that crossover and mutation do not overwrite good individuals
        elite = ga.select(
            population=population, 
            population_fitness=population_fitness, 
            n=len(population) // ga.ctx['ga_settings'].get('elite_divisor', 5),
            method=ga.ctx['ga_settings'].get('elite_method', 'fittest')
        )

        selected = ga.select(
            population=population, 
            population_fitness=population_fitness, 
            n=len(population),
            method=ga.ctx['ga_settings'].get('select_method', 'roulette')
        )

        # cross over the population
        # here, we use uniform crossover, which splits the population
        # and randomly exchanges 0 - all chromosomes
        crossed_over = ga.crossover(population=selected, method=ga.ctx['ga_settings'].get('crossover_method', 'uniform'))

        # mutate the crossed over population (leave elite alone)
        # can use fixed rate or variable to reduce mutation over generations
        # here we use a variable rate that increases if no improvement is found
        mutation_rate = ga.ctx['ga_settings'].get('mutation_rate', max(.05, n_unimproved / 50))
        mutated = ga.mutate(population=crossed_over, mutation_rate=mutation_rate)

        # validate the population (elite + mutated)
        population = ga.validate(
            population=np.vstack((elite, mutated)), 
            salaries=salaries, 
            salary_cap=ga.ctx['site_settings']['salary_cap']
        )

        # assess fitness and get the best score
        population_fitness = ga.fitness(population=population, points=points)
        omidx = population_fitness.argmax()
        generation_max = population_fitness[omidx]

        # if new best score, then set n_unimproved to 0
        # and save the new best score and lineup
        # otherwise increment n_unimproved
        if generation_max > best_fitness:
            logging.info(f'Lineup improved to {generation_max}')
            best_fitness = generation_max
            best_lineup = population[omidx]
            n_unimproved = 0
        else:
            n_unimproved += 1
            logging.info(f'Lineup unimproved {n_unimproved} times')

    # FINALIZE RESULTS
    # will break after n_generations or when stop_criteria reached
    return {
        'population': population,
        'fitness': population_fitness,
        'best_lineup': pool.loc[best_lineup, :],
        'best_score': best_fitness
    }