# univariate regression with nonlinear model and general M-estimator # Copyright (C) Wesley Phoa, 2000 from mathutil import * from operator import * from string import * def Lorentzian(z): """\ -log density function for Cauchy distributed errors """ return log(1 + z*z/2) def M_estimate(data, fn, logdensity=Lorentzian): """\ M_estimate(data, fn, logdensity): M-estimator of univariate model data is a list of tuples (x, y) fn is a function f of one variable logdensity = negative logarithm of assumed pdf of model errors returns sum logdensity(y - f(x)) """ return reduce(add, map(lambda (x, y), f=fn, logdensity=logdensity: logdensity(y - f(x)), data)) def R_squared(data, fn, logdensity=Lorentzian): """\ R_squared(data, fn): generalized R_squared of univariate model data is a list of tuples (x, y) fn is a function f of one variable logdensity = negative logarithm of assumed pdf of model errors returns 1 - (sum logdensity(y - f(x))) / (sum logdensity(y - y_avg)) """ n = len(data) y_avg = reduce(add, map(lambda (x, y): y, data)) / n M_est = M_estimate(data, fn, logdensity) return 1 - M_est / \ reduce(add, map(lambda (x, y), y_avg=y_avg, logdensity=logdensity: logdensity(y - y_avg), data)) def interpret(model): """\ convert a model specification like 'a0 + a1*x + a2*x*x' to a nested lambda expression, e.g. lambda a0, a1, a2: lambda x, a0=a0, a1=a1, a2=a2: a0 + a1*x + a2*x*x and also corresponding lambda expression for objective function """ varlist = [] expr = "'%s'"% (model, ) # fails for more than 10 variables n = 0 while find(model, 'a' + str(n)) > -1: varlist.append('a' + str(n)) expr = string.replace(expr, 'a' + str(n), "' + str(%s)+ '" % ('a' + str(n), )) n = n + 1 fn = eval("lambda %s: lambda x, %s: %s" % (join(varlist, ', '), join(map(lambda s: '%s=%s' % (s, s), varlist), ', '), model)) objfn = eval("lambda %s, data=gdata: M_estimate(data, lambda x, %s: %s)" % (join(varlist, ', '), join(map(lambda s: '%s=%s' % (s, s), varlist), ', '), model)) strfn = eval("lambda %s: %s" % (join(varlist, ', '), expr)) return fn, objfn, strfn, n def regress(model, data, init=None, objective=M_estimate): """\ fit, function, formula, R2 = regress(model, init): univariate regression model is a string expression specifying the model e.g. 'a0 + a1*x + a2*x*x' data is a list of tuples (x, y) init is a starting guess for the coefficients, e.g. (0., 1., 2.5) fit is the regression results (a0, a1,...) function is the model given as a lambda-expression formula is the model given as a string expression e.g. '5.32 + 8.24*x + 8.81*x*x' R2 is generalized R-squared """ global gdata gdata = data # hack so lambda-expression can see data fn, objfn, strfn, n = interpret(model) if not init: init = n * [0.] # starting guess fit = powell_min(objfn, init) return fit, apply(fn, fit), apply(strfn, fit), \ R_squared(data, apply(fn, fit)) if __name__=='__main__': data = [(1,2), (2,3.5), (3,6)] fn = lambda x: 2*x print M_estimate(data, fn), R_squared(data, fn) model = 'a0 + a1*x + a2*x*x' fit, function, formula, R2 = regress(model, data) print formula, R2 model = 'a0 + a1*x' fit, function, formula, R2 = regress(model, data) print formula, R2