Source code for lancet.dynamic

#
# Work in progress. Only SimpleOptimization is currently available.
#

import os, json, fnmatch
import param
from lancet.core import Arguments, Concatenate, CartesianProduct

[docs]class DynamicArgs(Arguments):
    """
    DynamicArgs are declarative specifications that specify a
    parameter space via a dynamic algorithmic process instead of using
    precomputed arguments. Unlike the static Args objects, new
    arguments can only be generated in response to some feedback from
    the processes that are executed. This type of dynamic feedback is
    a common feature for many algorithms such as hill climbing
    optimization, genetic algorithms, bisection search and other
    sophisticated optimization and search procedures.

    Like the Args objects, a DynamicArgs object is an iterator. On
    each iteration, one or more argument sets defining a collection of
    independent jobs are returned (these jobs should be possible to
    execute concurrently). Between iterations, the output_extractor
    function is used to extract the necessary information from the
    standard output streams of the previously executed jobs. This
    information is used to update the internal state of the
    DynamicArgs object can then generate more arguments to explore or
    terminate. All DynamicArgs classes need to declare the expected
    return format of the output_extractor function.
    """

    output_extractor = param.Callable(default=json.loads, doc="""
       The function that returns the relevant data from the standard
       output stream dumped to file in the streams subdirectory. This
       information must be retyrned in a format suitable for updating
       the specifier.. By default uses json.loads but pickle.loads
       could also be a valid option. The callable must take a string
       and return a Python object suitable for updating the specifier.""")


    def __init__(self, **params):
        super(DynamicArgs, self).__init__(**params)
        # Returned on next iteration and updated by _updated_state method
        self._next_val = self._initial_state(**params)
        # Trace of inputs from output_extractor and returned arguments
        self.trace = [(None, self._next_val)]

    def _update_state(self, data):
        """
        Determines the next desired point in the parameter space using
        the parsed data returned from the public update
        method. Returns the value that will next emitted on the next
        iteration using the list of dictionaries format for
        arguments. If the update fails or data is None, StopIteration
        should be supplied as the return value.
        """
        raise NotImplementedError


    def _initial_state(self, **kwargs):
        """
        Reset the the DynamicArgs object to its initial state and used
        to reset the object adter the iterator is exhausted. The
        return value is the initial argument to be returned by
        next().
        """
        raise NotImplementedError

    def __next__(self):
        if self._next_val is StopIteration:
            self._initial_state()
            raise StopIteration
        current_val =  self._next_val
        self._next_val = StopIteration
        return current_val

    next = __next__


[docs]    def update(self, tids, info):
        """
        Called to update the state of the iterator.  This methods
        receives the set of task ids from the previous set of tasks
        together with the launch information to allow the output
        values to be parsed using the output_extractor. This data is then
        used to determine the next desired point in the parameter
        space by calling the _update_state method.
        """
        outputs_dir = os.path.join(info['root_directory'], 'streams')
        pattern = '%s_*_tid_*{tid}.o.{tid}*' % info['batch_name']
        flist = os.listdir(outputs_dir)
        try:
            outputs = []
            for tid in tids:
                matches = fnmatch.filter(flist, pattern.format(tid=tid))
                if len(matches) != 1:
                    self.warning("No unique output file for tid %d" % tid)
                contents = open(os.path.join(outputs_dir, matches[0]),'r').read()
                outputs.append(self.output_extractor(contents))

            self._next_val = self._update_state(outputs)
            self.trace.append((outputs, self._next_val))
        except:
            self.warning("Cannot load required output files. Cannot continue.")
            self._next_val = StopIteration


[docs]    def show(self):
        """
        When dynamic, not all argument values may be available.
        """
        copied = self.copy()
        enumerated = [el for el in enumerate(copied)]
        for (group_ind, specs) in enumerated:
            if len(enumerated) > 1: print("Group %d" % group_ind)
            ordering = self.constant_keys + self.varying_keys
            # Ordered nicely by varying_keys definition.
            spec_lines = [', '.join(['%s=%s' % (k, s[k]) for k in ordering]) for s in specs]
            print('\n'.join(['%d: %s' % (i,l) for (i,l) in enumerate(spec_lines)]))

        print('Remaining arguments not available for %s' % self.__class__.__name__)


    def _trace_summary(self):
        """
        Summarizes the trace of values used to update the DynamicArgs
        and the arguments subsequently returned. May be used to
        implement the summary method.
        """
        for (i, (val, args)) in enumerate(self.trace):
            if args is StopIteration:
                info = "Terminated"
            else:
                pprint = ','.join('{' + ','.join('%s=%r' % (k,v)
                         for (k,v) in arg.items()) + '}' for arg in args)
                info = ("exploring arguments [%s]" % pprint )

            if i == 0: print("Step %d: Initially %s." % (i, info))
            else:      print("Step %d: %s after receiving input(s) %s." % (i, info.capitalize(), val))

    def __add__(self, other):
        """
        Concatenates two argument specifiers. See Concatenate and
        DynamicConcatenate documentation respectively.
        """
        if not other: return self
        dynamic = (isinstance(self, DynamicArgs),  isinstance(other, DynamicArgs))
        if dynamic == (True, True):
            raise Exception('Cannot concatenate two dynamic specifiers.')
        elif (True in dynamic):
            return DynamicConcatenate(self,other)
        else:
            return Concatenate(self,other)


    def __mul__(self, other):
        """
        Takes the cartesian product of two argument specifiers. See
        CartesianProduct and DynamicCartesianProduct documentation.
        """
        if not other: return []
        dynamic = (isinstance(self, DynamicArgs),  isinstance(other, DynamicArgs))
        if dynamic == (True, True):
            raise Exception('Cannot take Cartesian product two dynamic specifiers.')
        elif (True in dynamic):
            return DynamicCartesianProduct(self, other)
        else:
            return CartesianProduct(self, other)


    def __len__(self):
        """
        Many DynamicArgs won't have a length that can be
        precomputed. Most DynamicArgs objects will have an iteration
        limit to guarantee eventual termination. If so, the maximum
        possible number of arguments that could be generated should be
        returned.
        """
        raise NotImplementedError



[docs]class SimpleGradientDescent(DynamicArgs):
    """
    Very simple gradient descent optimizer designed to illustrate how
    Dynamic Args may be implemented. This class has been deliberately
    kept simple to clearly illustrate how Dynamic Args may be
    implemented. A more practical example would likely probably make
    use of mature, third party optimization libraries (such as the
    routines offered in scipy.optimize).

    This particular algorithm greedily minimizes an output value via
    greedy gradient descent. The local parameter space is explored by
    examining the change in output value when an increment or
    decrement of 'stepsize' is made in the parameter space, centered
    around the current position. The initial parameter is initialized
    with the 'start' value and the optimization process terminates
    when either a local minima/maxima has been found or when
    'max_steps' is reached.

    The 'output_extractor' function is expected to return a single
    scalar number to drive the gradient descent algorithm forwards.
    """

    key = param.String(constant=True, doc="""
        The name of the argument that will be optimized in a greedy fashion.""")

    start = param.Number(default=0.0, constant=True, doc="""
        The starting argument value for the gradient ascent or descent""")

    stepsize = param.Number(default=1.0, constant=True, doc="""
        The size of the steps taken in parameter space.""")

    max_steps=param.Integer(default=100, constant=True, doc="""
        Once max_steps is reached, the optimization terminates.""")

    def __init__(self, key, **params):
        super(SimpleGradientDescent, self).__init__(key=key, **params)
        self.pprint_args(['key', 'start', 'stepsize'],[])
        self._termination_info = None

    def _initial_state(self, **kwargs):
        self._steps_complete = 0
        self._best_val = float('inf')
        self._arg = self.start
        return [{self.key:self.start+1}, {self.key:self.start-1}]

    def _update_state(self, vals):
        """
        Takes as input a list or tuple of two elements. First the
        value returned by incrementing by 'stepsize' followed by the
        value returned after a 'stepsize' decrement.
        """
        self._steps_complete += 1
        if self._steps_complete == self.max_steps:
            self._termination_info = (False, self._best_val, self._arg)
            return StopIteration

        arg_inc, arg_dec = vals
        best_val = min(arg_inc, arg_dec, self._best_val)
        if best_val == self._best_val:
            self._termination_info = (True, best_val, self._arg)
            return StopIteration

        self._arg += self.stepsize if (arg_dec > arg_inc) else -self.stepsize
        self._best_val= best_val
        return [{self.key:self._arg+self.stepsize},
                {self.key:self._arg-self.stepsize}]

    @property
    def constant_keys(self):  return []
    @property
    def constant_items(self): return []
    @property
    def varying_keys(self):   return [self.key]

    def summary(self):
        print('Varying Keys: %r' % self.key)
        print('Maximum steps allowed: %d' % self.max_steps)
        self._trace_summary()
        (val, arg) = (self.trace[-1])
        if self._termination_info:
            (success, best_val, arg) = self._termination_info
            condition =  'Successfully converged.' if success else 'Maximum step limit reached.'
            print("%s Minimum value of %r at %s=%r." % (condition, best_val, self.key, arg))

    def __len__(self):
        return 2*self.max_steps # Each step specifies 2 concurrent jobs



#=========================#
# Experimental code (WIP) #
#=========================#


[docs]class DynamicConcatenate(DynamicArgs):
    def __init__(self, first, second):
        self.first = first
        self.second = second
        super(Concatenate, self).__init__(dynamic=True)

        self._exhausted = False
        self._first_sent = False
        self._first_cached = None
        self._second_cached = None
        if not isinstance(first,DynamicArgs):
            self._first_cached = next(first.copy())
        if not isinstance(second,DynamicArgs):
            self._second_cached = next(second.copy())
        self.pprint_args(['first', 'second'],[], infix_operator='+')


    def constant_keys(self):
        return list(set(self.first.constant_keys) | set(self.second.constant_keys))


    def varying_keys(self):
        return list(set(self.first.varying_keys) | set(self.second.varying_keys))


    def update(self, tids, info):
        if (self.isinstance(self.first,DynamicArgs) and not self._exhausted):
            self.first.update(tids, info)
        elif (self.isinstance(self.second,DynamicArgs) and self._first_sent):
            self.second.update(tids, info)

    def __next__(self):
        if self._first_cached is None:
            try:  return next(self.first)
            except StopIteration:
                self._exhausted = True
                return self._second_cached
        else:
            if not self._first_sent:
                self._first_sent = True
                return self._first_cached
            else:
                return  next(self.second)

    next = __next__


[docs]class DynamicCartesianProduct(DynamicArgs):

    def __init__(self, first, second):

        self.first = first
        self.second = second

        overlap = set(self.first.varying_keys) &  set(self.second.varying_keys)
        assert overlap == set(), 'Sets of keys cannot overlap between argument specifiers in cartesian product.'

        super(CartesianProduct, self).__init__(dynamic=True)

        self._first_cached = None
        self._second_cached = None
        if not isinstance(first,DynamicArgs):
            self._first_cached = next(first.copy())
        if not isinstance(second,DynamicArgs):
            self._second_cached = next(second.copy())

        self.pprint_args(['first', 'second'],[], infix_operator='*')

    def constant_keys(self):
        return list(set(self.first.constant_keys) | set(self.second.constant_keys))


    def varying_keys(self):
        return list(set(self.first.varying_keys) | set(self.second.varying_keys))


    def update(self, tids, info):
        if self.isinstance(self.first,DynamicArgs):  self.first.update(tids, info)
        if self.isinstance(self.second,DynamicArgs): self.second.update(tids, info)


    def __next__(self):
        if self._first_cached is None:
            first_spec = next(self.first)
            return self._cartesian_product(first_spec, self._second_cached)
        else:
            second_spec = next(self.second)
            return self._cartesian_product(self._first_cached, second_spec)
    next = __next__
Source code for lancet.dynamic

Table Of Contents

This Page

Navigation

Source code for lancet.dynamic

Table Of Contents

This Page

Quick search

Navigation