Tryag File Manager
Home
||
Turbo Force
||
B-F Config_Cpanel
Current Path :
/
paip
/
script
/
weight
/
Or
Select Your Path :
Upload File :
New :
File
Dir
//paip/script/weight/pixelHourStatGenerator.py
import os, sys, pickle from datetime import date, timedelta import pandas as pd import numpy as np from sklearn.cluster import DBSCAN from sklearn.preprocessing import PolynomialFeatures from sklearn.linear_model import LinearRegression # set sys path to import PyDBconnector HOME_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir) DATA_DIR = 'data' MODULE_DIR = 'util' OUTPUT_DIR = 'out' insertDB = 1 sys.path.append(os.path.join(HOME_PATH, MODULE_DIR)) from PyDBconnector import PyDBconnector import warnings warnings.filterwarnings('ignore') def clustering_pixel(pixel_from_db_df, inTer : int =2, dbscan_esp : int = 0.075, plotInner : bool = False, plotOuter : bool = True) -> pd.DataFrame : intervals = str(inTer) + 'h' return_df = None print(f" unique module_id : {pixel_from_db_df.module_id.unique()}") for moduleId in sorted(pixel_from_db_df.module_id.unique()) : print(f"ing...{moduleId}") house_id = pixel_from_db_df[pixel_from_db_df.module_id == moduleId]['house_id'].iloc[0] plot_df = pixel_from_db_df[pixel_from_db_df.module_id == moduleId].groupby(pd.Grouper(key='create_time', freq=intervals))[['predPixelMeanR']].sum() # plot_df['seqNum'] = plot_df.create_time.rank(method='first').astype(int) result_df = pd.DataFrame(columns=['meanPixel', 'pixelCount'], index=plot_df.index) plot_df = plot_df.reset_index() for idx, pdRow in plot_df.dropna().iterrows() : #if idx > 1 : break if isinstance(pdRow.predPixelMeanR, int) : continue meanr = pd.DataFrame(pdRow.predPixelMeanR, columns=['meanr']).reset_index() meanr['index'] = meanr['index'] / 60 meanr['logMean'] = np.log10(meanr['meanr']) # meandb = pd.DataFrame(pdRow.predPixelMeanDB, columns=['meandb']).reset_index() # meandb['index'] = meandb['index'] / 60 model = DBSCAN(eps = dbscan_esp, min_samples=5, n_jobs=-1) #min_samples=5, meanr['predict'] = model.fit_predict(meanr[['index','logMean']]) dicts = meanr[meanr.predict != -1].groupby(['predict'])['meanr'].mean().to_dict() dicts2 = meanr[meanr.predict != -1].groupby(['predict'])['logMean'].count().to_dict() if len(meanr.predict.unique()) == 1 : continue meanr['inOut'] = [('out' if x == -1 else 'in') for x in meanr.predict ] # if plotInner : # colors = ['green','red'] # #sns.palplot() # print(f"nunique : {meanr.predict.unique()},{len(meanr.predict.unique())}, inout : {meanr.inOut.nunique()}") # print(f"count : {np.sum(list(dicts2.values()))}") # # print(f"mean : {round(meanr[meanr.predict != -1]['meanr'].mean(),1)}") # print(f"mean : {round(max(dicts.values()),1)}") # p, (ax1,ax2,ax3) = plt.subplots(1,3, figsize=(30,10)) # _ = ax1.yaxis.set_major_locator(MultipleLocator(1000)) # _ = ax2.yaxis.set_major_locator(MultipleLocator(0.1)) # _ = sns.scatterplot(data=meanr, x='index', y='meanr', ax = ax1) # _ = sns.scatterplot(data=meanr, x='index', y='logMean', hue='inOut', ax = ax2, palette= colors[:meanr.inOut.nunique()]) # _ = sns.histplot(data=meanr, x='meanr', ax = ax3, hue='inOut', kde=True, palette= colors[:meanr.inOut.nunique()]) # # _ = sns.scatterplot(data=meandb, x='index', y='meandb', ax = ax1, color='red') # #_ = sns.histplot(data=meandb, x='meandb', ax = ax3, kde=True, palette= sns.color_palette("Paired", 1)) # _ = plt.suptitle(str(moduleId) + "[" + pdRow.create_time.strftime("%m/%d %H:00") + "]", fontsize= 20) # plt.show() result_df.loc[pdRow.create_time] = [ round(max(dicts.values()),1), np.sum(list(dicts2.values()))] result_df.reset_index(inplace=True) result_df['seqNum'] = np.round(result_df.create_time.rank() / (24 // inTer) + 1.,2) result_df.dropna(inplace=True) result_df['meanPixel'] = result_df.meanPixel.astype(float) result_df['pixelCount'] = result_df.pixelCount.astype(int) result_df['module_id'] = [moduleId] * len(result_df) result_df['house_id'] = [house_id] * len(result_df) # if plotOuter : # sns.set_style('whitegrid') # p, ax1 = plt.subplots(figsize=(40,10)) # sp1 = sns.lineplot(data=result_df, x='seqNum', y='meanPixel', marker='o', ax = ax1) # _= sp1.set(title=moduleId + "[" + intervals + "]") # ax2 = ax1.twinx() # _ = sns.lineplot(data=result_df, x='seqNum', y='pixelCount', marker='o', linestyle='--', ax = ax2, color='red') # #_ = sns.regplot(data=result_df, x='seqNum', y='meanPixel', marker='o', ax = ax1, order=1,lowess=True) # for idx,item in enumerate(result_df.iterrows()): # #item[1] is a grouped data frame # #if idx > 1 : break # m, _, x = list(item[1][1:].values) # y=m # _ = ax1.text(x,y,f'{m:.2f}') # # ax1.yaxis.set_major_locator(MultipleLocator(300)) # # # plt.show() if return_df is None : return_df = pd.DataFrame(columns=result_df.columns) return_df = pd.concat([return_df,result_df]) return return_df if __name__ == '__main__': dbconn = PyDBconnector() yesterday = date.today() - timedelta(1) # yesterday='2022-01-08' # -4 ~ -2 시간 데이터만 가져오기 image_str = f'''select CREATE_TIME,HOUSE_ID,MODULE_ID,WEIGHT_PREDICTION_COUNT,WEIGHT_PREDICTION_PIXEL_MEAN from tbl_image_analysis_weight where 1=1 and `WEIGHT_PREDICTION_STATUS` != 'fail' and TIMESTAMPDIFF(HOUR,date_format(now(), '%Y-%m-%d %H'),create_time) >= -2 and TIMESTAMPDIFF(HOUR,date_format(now(), '%Y-%m-%d %H'),create_time) < 0; ''' inDate_str = f'''select max(house_id) as house_id, max(in_date) as in_date, count(*) as cnt from tbl_house_breed_hist group by house_id''' # read data from db pixel_from_db_df = dbconn.select_from_db(image_str) # [['CREATE_TIME','HOUSE_ID','MODULE_ID','WEIGHT_PREDICTION_COUNT','WEIGHT_PREDICTION_PIXEL_MEAN']] inDate_df = dbconn.select_from_db(inDate_str) # columns to lower case pixel_from_db_df.columns = [str(x).lower() for x in pixel_from_db_df.columns] inDate_df.columns = [str(x).lower() for x in inDate_df.columns] pixel_from_db_df.columns = ['create_time', 'house_id', 'module_id', 'predCountR', 'predPixelMeanR'] # image df pixel_from_db_df.dropna(inplace=True) pixel_from_db_df['create_time'] = pd.to_datetime(pixel_from_db_df.create_time, format='%Y-%m-%d %H:%M:%S') pixel_from_db_df['house_id'] = pixel_from_db_df.house_id.astype(str) pixel_from_db_df['module_id'] = pixel_from_db_df.module_id.astype(str) pixel_from_db_df['predPixelMeanR'] = [eval(x) for x in pixel_from_db_df.predPixelMeanR] # weight df ### 일령별 무게 증가량 weight_default = [42, 57, 72, 89, 109, 131, 156, 185, 216, 251, 289, 330, 375, 423, 474, 529, 587, 648, 713, 780, 850, 923, 998, 1076, 1156, 1238, 1322, 1408, 1495, 1584, 1674, 1764, 1856] real_weight_df = pd.DataFrame( {"dayAge": [x + 0.5 for x in range(len(weight_default))], "realWeight": weight_default}) real_weight_df['logWeight'] = np.log10(real_weight_df.realWeight) real_weight_df = real_weight_df.set_index('dayAge').reindex( [round((x - 1) / 24 + 0.5, 2) for x in range(1, 24 * 40)]).reset_index() real_weight_df['realWeight'] = real_weight_df.realWeight.interpolate() # dense based clustering result_df = clustering_pixel(pixel_from_db_df, inTer=2, plotInner=False, plotOuter=False) print(result_df) print('-----') print(inDate_df) # update date time ... using database tbl_house_breed_hist for house_id in inDate_df.house_id.unique(): cycleNum = inDate_df[inDate_df.house_id == house_id]['cnt'] print(f"cycleNum : {cycleNum}") result_df.loc[result_df.house_id == house_id, 'dayAge'] = [round( (x - pd.to_datetime(inDate_df[inDate_df.house_id == house_id]['in_date'])) / np.timedelta64(1, 'h') / 24., 2) for x in result_df[result_df.house_id == house_id].create_time] result_df.loc[result_df.house_id == house_id, 'cycleNum'] = [cycleNum] * len( result_df[result_df.house_id == house_id]) # only for sansu # result_df = result_df[result_df.module_id != 'CT03,8'] # 카메라 앵글 변경됨.. 추가. test_df = pd.merge(left=result_df, right=real_weight_df[['dayAge', 'realWeight']], how='outer', on='dayAge').dropna() test_df['realWeight'] = np.round(test_df.realWeight,1) # pixel_weight_H01_regressor.pkl for house_id in test_df.house_id.unique(): try : with open('./model/pixel_weight_' + house_id + '_regressor.pkl', 'rb') as f: regModel = pickle.load(f) poly_feature = PolynomialFeatures(degree=2, include_bias=False) A_poly = poly_feature.fit_transform(test_df[test_df.house_id == house_id][['meanPixel']]) test_df.loc[test_df.house_id == house_id, 'predictedWeight'] = np.round(regModel.predict(A_poly),1) except : try: with open('./model/pixel_weight_regressor.pkl', 'rb') as f: regModel = pickle.load(f) poly_feature = PolynomialFeatures(degree=2, include_bias=False) A_poly = poly_feature.fit_transform(test_df[test_df.house_id == house_id][['meanPixel']]) test_df.loc[test_df.house_id == house_id, 'predictedWeight'] = np.round(regModel.predict(A_poly), 1) except : with open('./model/chery_linear_model.pkl', 'rb') as f: regModel = pickle.load(f) # X, y = rows[['medianPixel']], rows['medianWeight'] poly_feature = PolynomialFeatures(degree=1, include_bias=False) A_poly = poly_feature.fit_transform(test_df[test_df.house_id == house_id][['meanPixel']]) test_df.loc[test_df.house_id == house_id, 'predictedWeight'] = np.round(regModel.predict(A_poly)*0.65,1) print(test_df) # sns.set_style("whitegrid") # # p, ax = plt.subplots(figsize=(20, 10)) # # sc1 = sns.regplot(data=result_df[result_df.house_id == house_id], x='dayAge', y='predict', order=2, # line_kws={'color': 'orange'}, scatter_kws={'color': 'gray'}) # sc1.set(title=house_id) # # ax.yaxis.set_major_locator(MultipleLocator(100)) # sns.lineplot( # data=real_weight_df[real_weight_df.dayAge <= max(result_df[result_df.house_id == house_id].dayAge)], # x='dayAge', y='realWeight', ax=ax, marker='o', color='r') # plt.show() # DB insert if insertDB > 0 : try : dbConn = PyDBconnector() print('insert results===========================') for pdRow in test_df.iterrows() : insert_string = f"insert into tbl_pixel_stats(create_time, house_id, module_id, cycleNum, meanPixel, pixelCount, dayAge, realWeight, predictedWeight) values('{pdRow[1]['create_time']}','{pdRow[1]['house_id']}','{pdRow[1]['module_id']}',{pdRow[1]['cycleNum']},{pdRow[1]['meanPixel']},{pdRow[1]['pixelCount']},{pdRow[1]['dayAge']},{pdRow[1]['realWeight']},{pdRow[1]['predictedWeight']})" dbConn.insert_to_db(insert_string) dbConn.close() except : raise Exception('insert query error! check DB')