diff --git a/is/kaggle/cv.py b/is/kaggle/cv.py index 57a9417..8411f38 100644 --- a/is/kaggle/cv.py +++ b/is/kaggle/cv.py @@ -49,15 +49,34 @@ return(np.sqrt(error/n)) #1st example +def interpol(data): + return data['Weight'].interpolate() + +def calorieBased(data): + calMean=data['Calories'].mean() + calSTD=data['Calories'].std() + #fill with random data for nan-values + data.fillna(np.random(loc=calMean,scale=calSTD), axis='Calories') + print(data) + for i in range(len(data)): + if i==0: + data['Weight'][0]=data['Weight'].mean() + elif np.isnan(data['Weight'][i]): + data['Weight',i]=data['Weight'][i-1]+(np.mean(data['Calories'][i-5:i])-calMean)/10 + + return(data['Weight']) + rmse=[] sum=0 n=0 for i in range(10): - data_train[i]['WeightInter'] = data_train[i]['Weight'].interpolate() - rmse.append(evaluate(data_train[i], data_test[i],'WeightInter')) + data_train[i]['predWeight'] = interpol(data_train[i]) + rmse.append(evaluate(data_train[i], data_test[i])) if(~np.isnan(rmse[i])): n+=1 sum+=rmse[i] print("RMSE(",i,"):",rmse[i]) print("Mean RSME:",sum/n) + +calorieBased(data_train[1])