PDA

View Full Version : NonlinearRegression - trivial change increases solution time?



cebailey
06-13-2007, 12:30 PM
Using the NonlinearRegression class, why does saying
ydata = new double[myArraySizeVariable];
xdata = new double[myArraySizeVariable];
inside the definition of the method f() almost double the time to execute regression.Solve(fcn), from 0.4 s to 0.7 s, relative to doing this elsewhere?

Forgive me if the question or my code below looks really dumb. I am just learning C# and OOP and this is my first project. I found the second version fixes a problem in the first version but it probably needs lots of work. If you feel like taking pity on the beginner, great!



// THIS VERSION SOLVES IN 0.4 SECONDS, BUT IT'S UGLY 7 LINES DOWN
namespace Exploring
{
public class NonlinearRegressionEx1 : Imsl.Stat.NonlinearRegression.IDerivative
{
public int block4Count = 0;
// THIS IS UGLY! MUST HARD CODE 1133 BECAUSE block4Count UNAVAILABLE HERE
public double[] ydata = new double[1133];
public double[] xdata = new double[1133];
public void ReadData()
{
for (int i = 0; i < allDataCount; i++)
{
if (this point is the start of block4) block4Min = i;
if (this point is in block4) block4Count = block4Count + 1;
}
nobs = block4Count;
} // end method ReadData()
public bool f(double[] theta, int iobs, double[] frq, double[] wt, double[] e)
{
System.Array.Copy(someYDataArray, block4Min, ydata, 0, block4Count);
System.Array.Copy(someXDataArray, block4Min, xdata, 0, block4Count);
if (iobs < nobs)
{
wt[0] = 1.0;
frq[0] = 1.0;
iend = true;
e[0] = ydata[iobs] - (expression for my model);
}
else
{
iend = false;
}
return iend;
} // end method f()

public bool derivative(double[] theta, int iobs, double[] frq, double[] wt, double[] de)
{
if (iobs < nobs)
{
wt[0] = 1.0;
frq[0] = 1.0;
iend = true;
de[0] = -(derivative of expression for my model);
de[1] = etc etc;
}
else
{
iend = false;
}
return iend;
} // end method derivative()
} // end class NonlinearRegression Ex1
class Program
{
static void Main(string[] args)
{
int nparm = 10;
double[] theta = new double[] {all, my, parameter, guesses, etc};
NonlinearRegression regression = new NonlinearRegression(nparm);
regression.Guess = theta; // Array of guess values came from above
regression.Scale = invtheta; // Some array I derive from array theta
NonlinearRegressionEx1 fcn = new NonlinearRegressionEx1();
fcn.ReadData();
double[] coef = regression.Solve(fcn);
// Report parameter estimates
Console.Out.WriteLine(all the results I want to read on screen);
Console.ReadLine();
} // end Main
} // end class Program
} // end namespace Exploring




// THIS VERSION FIXES THAT UGLINESS. WHY DOES IT NOW TAKE 0.7 SECONDS TO SOLVE?
namespace Exploring
{
public class NonlinearRegressionEx1 : Imsl.Stat.NonlinearRegression.IDerivative
{
public int block4Count = 0;
// JUST DECLARE THEM HERE, DON'T SET ASIDE SPACE YET, DO THAT 15 LINES BELOW
public double[] ydata;
public double[] xdata;
public void ReadData()
{
for (int i = 0; i < allDataCount; i++)
{
if (this point is the start of block4) block4Min = i;
if (this point is in block4) block4Count = block4Count + 1;
}
nobs = block4Count;
} // end method ReadData()
public bool f(double[] theta, int iobs, double[] frq, double[] wt, double[] e)
{
// NOW THAT I CAN ACCESS block4Count, SET ASIDE THE SPACE
ydata = new double[block4Count];
xdata = new double[block4Count];
System.Array.Copy(someYDataArray, block4Min, ydata, 0, block4Count);
System.Array.Copy(someXDataArray, block4Min, xdata, 0, block4Count);
if (iobs < nobs)
{
wt[0] = 1.0;
frq[0] = 1.0;
iend = true;
e[0] = ydata[iobs] - (expression for my model);
}
else
{
iend = false;
}
return iend;
} // end method f()
public bool derivative(double[] theta, int iobs, double[] frq, double[] wt, double[] de)
{
if (iobs < nobs)
{
wt[0] = 1.0;
frq[0] = 1.0;
iend = true;
de[0] = -(derivative of expression for my model);
de[1] = etc etc;
}
else
{
iend = false;
}
return iend;
} // end method derivative()
} // end class NonlinearRegression Ex1
class Program
{
static void Main(string[] args)
{
int nparm = 10;
double[] theta = new double[] {all, my, parameter, guesses, etc};
NonlinearRegression regression = new NonlinearRegression(nparm);
regression.Guess = theta; // Array of guess values came from above
regression.Scale = invtheta; // Some array I derive from array theta
NonlinearRegressionEx1 fcn = new NonlinearRegressionEx1();
fcn.ReadData();
double[] coef = regression.Solve(fcn);
// Report parameter estimates
Console.Out.WriteLine(all the results I want to read on screen);
Console.ReadLine();
} // end Main
} // end class Program
} // end namespace Exploring

brian
06-13-2007, 05:52 PM
I believe the time difference is due to repeated memory allocation each time the regression class calls the user supplied function. You may also want to try and avoid copying the data each time you enter this function.



ydata = new double[block4Count]; // reallocating memory is expensive
xdata = new double[block4Count];
System.Array.Copy(someYDataArray, block4Min, ydata, 0, block4Count); // if you use private member
System.Array.Copy(someXDataArray, block4Min, xdata, 0, block4Count); // data this may not be necessary


I have attached some code that uses a constructor to specify member data. Assuming you call the read data function the array size is variable and the memory persistent (at least until the class goes out of scope).



using System;
using Imsl.Math;
using Imsl.Stat;

public class NonlinearRegressionEx1 : NonlinearRegression.IFunction
{
private double[] xdata;
private double[] ydata;

NonlinearRegressionEx1(String filename)
{
ydata = new double[]{54.0, 50.0, 45.0, 37.0, 35.0, 25.0, 20.0, 16.0,
18.0, 13.0, 8.0, 11.0, 8.0, 4.0, 6.0};
xdata = new double[]{2.0, 5.0, 7.0, 10.0, 14.0, 19.0, 26.0, 31.0, 34.0,
38.0, 45.0, 52.0, 53.0, 60.0, 65.0};
// Read file here instead of hard code so data availible to methods f and derivitve
// ex
// ReadData(filename);
}

/* public void ReadData(String filename)
{
for (int i = 0; i < allDataCount; i++)
{
if (this point is the start of block4) block4Min = i;
if (this point is in block4) block4Count = block4Count + 1;
}
nobs = block4Count;
} // end method ReadData() */

public bool f(double[] theta, int iobs, double[] frq, double[] wt, double[] e)
{
bool iend;
int nobs = 15;

if (iobs < nobs)
{
wt[0] = 1.0;
frq[0] = 1.0;
iend = true;
e[0] = ydata[iobs] - theta[0] * Math.Exp(theta[1] * xdata[iobs]);
}
else
{
iend = false;
}
return iend;
}
public static void Main(String[] args)
{
int nparm = 2;
double[] theta = new double[]{60.0, - 0.03};
NonlinearRegression regression = new NonlinearRegression(nparm);
regression.Guess = theta;
NonlinearRegression.IFunction fcn = new NonlinearRegressionEx1("file name");
double[] coef = regression.Solve(fcn);

Console.Out.WriteLine
("The computed regression coefficients are {" + coef[0] + ", "
+ coef[1] + "}");
Console.Out.WriteLine("The computed rank is " + regression.Rank);
Console.Out.WriteLine("The degrees of freedom for error are " +
regression.DFError);
Console.Out.WriteLine("The sums of squares for error is "
+ regression.GetSSE());
new PrintMatrix("R from the QR decomposition ").Print(regression.R);
}
}


Regards,

brian