Tryag File Manager
Home
||
Turbo Force
||
B-F Config_Cpanel
Current Path :
/
paip
/
script
/
weight
/
Or
Select Your Path :
Upload File :
New :
File
Dir
//paip/script/weight/housePixelWeightRegressor.py
import os, sys, pickle from datetime import date, timedelta import pandas as pd from sklearn.preprocessing import PolynomialFeatures from sklearn.linear_model import LinearRegression import scipy.stats as sss # set sys path to import PyDBconnector HOME_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir) DATA_DIR = 'data' MODULE_DIR = 'util' OUTPUT_DIR = 'out' sys.path.append(os.path.join(HOME_PATH, MODULE_DIR)) from PyDBconnector import PyDBconnector import warnings warnings.filterwarnings('ignore') if __name__ == '__main__': dbconn = PyDBconnector() yesterday = date.today() - timedelta(7) sqlPixel = f'''select create_time, house_id, module_id, meanpixel, realweight from tbl_pixel_stats a where a.create_time >= (select max(in_date) from tbl_house_breed_hist ) and a.create_time >= {yesterday} ''' sqlWeight = f'''select create_time, house_id, module_id, medianweight from tbl_weight_stats a where a.create_time >= (select max(in_date) from tbl_house_breed_hist ) and a.create_time >= {yesterday} ''' try : pixel_df = dbconn.select_from_db(sqlPixel) weight_df = dbconn.select_from_db(sqlWeight) except : print(f"Exception :: tbl_pixel_stats a, tbl_weight_stats b !!!") raise IOError # pixel pixel_df.columns = [str(x).lower() for x in pixel_df.columns] pixel_df['create_time'] = pd.to_datetime(pixel_df.create_time, format='%Y-%m-%d %H:%M:%S') pixel_df['house_id'] = pixel_df.house_id.astype(str) pixel_df['module_id'] = pixel_df.module_id.astype(str) pixel_df['meanpixel'] = pixel_df.meanpixel.astype(float) pixel_df['realweight'] = pixel_df.realweight.astype(float) pixel_gp = pixel_df.groupby([pd.Grouper(key='create_time', freq='1d'), 'house_id'])[ ['meanpixel', 'realweight']].agg({'meanpixel': lambda x: sss.trim_mean(x, 0.2), 'realweight': 'mean'}) # weight weight_df.columns = [str(x).lower() for x in weight_df.columns] weight_df['create_time'] = pd.to_datetime(weight_df.create_time, format='%Y-%m-%d %H:%M:%S') weight_df['house_id'] = weight_df.house_id.astype(str) weight_df['module_id'] = weight_df.module_id.astype(str) weight_df['medianweight'] = weight_df.medianweight.astype(float) weight_gp = weight_df.groupby([pd.Grouper(key='create_time', freq='1d'), 'house_id'])[['medianweight']].agg( lambda x: sss.trim_mean(x, 0.2)) ## merge results test_df = pd.merge(pixel_gp, weight_gp, left_index=True, right_index=True, how='outer') test_df = test_df[test_df.meanpixel > 0] test_df['useWeight'] = [max(x) if x[0] > 0 else x[1] for x in zip(test_df.medianweight, test_df.realweight)] ## 전체 모델 models_df = test_df[['meanpixel', 'useWeight']].copy() poly_feature = PolynomialFeatures(degree=2, include_bias=False) A_poly = poly_feature.fit_transform(models_df.drop('useWeight', 1)) lin_reg = LinearRegression() # LinearRegression 객체 생성 lin_reg.fit(A_poly, models_df['useWeight']) # display_maxrow(result_df[result_df.house_id == house_id]) with open(os.path.join(HOME_PATH, 'weight', 'model', 'pixel_weight_regressor.pkl'), 'wb') as f: pickle.dump(lin_reg, f) # 축사별 모델 # update regressor for house_id in test_df.house_id.unique() : models_df = test_df[test_df.house_id == house_id][['meanpixel','useWeight']].copy() poly_feature = PolynomialFeatures(degree=2, include_bias=False) A_poly = poly_feature.fit_transform(models_df.drop('useWeight', 1)) lin_reg = LinearRegression() # LinearRegression 객체 생성 lin_reg.fit(A_poly, models_df['useWeight']) #display_maxrow(result_df[result_df.house_id == house_id]) with open(os.path.join(HOME_PATH,'weight','model','pixel_weight_' + str(house_id).strip() + '_regressor.pkl'), 'wb') as f: pickle.dump(lin_reg, f)