diff --git a/is/kaggle/cv.py b/is/kaggle/cv.py index acb1d68..57a9417 100644 --- a/is/kaggle/cv.py +++ b/is/kaggle/cv.py @@ -33,7 +33,6 @@ train=data_train[i][data_train[i]['ID']==cleanData.iloc[j]['ID']] train['Weight']=float('NaN') data_train[i][data_train[i]['ID']==cleanData.iloc[j]['ID']]=train - print(data_train[i][data_train[i]['ID']==cleanData.iloc[j]['ID']]['Weight']) return (data_train,data_test) data_train, data_test=cv() @@ -46,15 +45,19 @@ test_value=np.float64(data_test[i].loc['Weight']) #no better idea... pred_value=predictions.iloc[int(data_test[i].loc['ID'])-1][predictedWeight] - print("test:",test_value) - print("pred:",pred_value) error+= (test_value - pred_value)**2 - print(error) return(np.sqrt(error/n)) #1st example +rmse=[] +sum=0 +n=0 for i in range(10): data_train[i]['WeightInter'] = data_train[i]['Weight'].interpolate() - print(data_train[i]) - print("RMSE(",i,"):",evaluate(data_train[i], data_test[i],'WeightInter')) + rmse.append(evaluate(data_train[i], data_test[i],'WeightInter')) + if(~np.isnan(rmse[i])): + n+=1 + sum+=rmse[i] + print("RMSE(",i,"):",rmse[i]) +print("Mean RSME:",sum/n)