Running some experiments with SBP using this dataset:
Pagie.csv
And by running this script:
from pyGPGOMEA import GPGOMEARegressor as GPG
def standardNotation(expr):
expr = (expr.replace("X0", "x0")
.replace("X1", "x1")
.replace("X2", "x2")
.replace("_", "")
.replace("+-", "-")
.replace("--", "+")
.replace("^", "**")
)
expr = re.sub(r"/(-\d+\.\d+)", r"/(\1)", expr)
return re.sub(r"\*(-\d+\.\d+)", r"*(\1)", expr)
est = GPG( popsize=500, generations=200,
linearscaling=True, functions='+_-_*_div_log_exp', erc=True,
initmaxtreeheight=6, maxtreeheight=20, maxsize=1000,
subcross=0.0, sbagx=False,
sbrdo=0.75, submut=0.25,
unifdepthvar=True,
tournament=4,
sblibtype='p_10_9999_l_n',
caching=False,
gomea=False, ims=False, silent=True, parallel=False, seed=1 )
z = np.loadtxt("Pagie.csv", delimiter=",")
x = z[:,:-1]
y = z[:,-1]
x0 = x[:,0]
x1 = x[:,1]
est.fit(x,y)
eq = standardNotation(model(est))
yhat = eval(eq)
yhat2 = est.predict(x)
print(np.square(yhat-yhat2).mean()) # squared error between the predicted output from `predict` method and from evaluating the symbolic model
I get a mean squared error of 5624673608570.937, as discussed it is possibly due to truncation of the coefficient values.
Running some experiments with SBP using this dataset:
Pagie.csv
And by running this script:
I get a mean squared error of
5624673608570.937, as discussed it is possibly due to truncation of the coefficient values.