Tryag File Manager
Home
||
Turbo Force
||
B-F Config_Cpanel
Current Path :
/
paip
/
script
/
weight
/
Or
Select Your Path :
Upload File :
New :
File
Dir
//paip/script/weight/PredictWeightTrend.py
''' @date : 2022-03-25 @author: 전규빈 @content : 무게 트렌드 생성 ''' import os.path import sys import time import pandas as pd import numpy as np import glob, warnings from datetime import datetime import matplotlib as mpl import matplotlib.pyplot as plt from scipy import stats from sklearn.preprocessing import StandardScaler from sklearn.cluster import DBSCAN import warnings from sklearn.preprocessing import PolynomialFeatures from sklearn.linear_model import LinearRegression from sklearn.metrics import mean_squared_error, r2_score from sklearn.model_selection import train_test_split from sklearn.metrics import explained_variance_score import joblib import xgboost import pickle from sklearn.pipeline import Pipeline from datetime import datetime from apscheduler.schedulers.background import BackgroundScheduler from apscheduler.jobstores.base import JobLookupError HOME_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir) DATA_DIR = 'data' MODULE_DIR = 'util' OUTPUT_DIR = 'out' sys.path.append(os.path.join(HOME_PATH, MODULE_DIR)) from PyDBconnector import PyDBconnector warnings.filterwarnings('ignore') class WeightPredict(): instance = None @classmethod def _getInstance(cls): return cls._instance @classmethod def instance(cls, *args, **kargs): cls._instance = cls(*args, **kargs) cls.instance = cls._getInstance return cls._instance def __init__(self): self._dbConn = PyDBconnector('192.100.0.11') self.load_data() def change_raw_pixel(self, dataframe): def remove_str(x): x = x.replace('[', "") x = x.replace(']', "") arr_list = [int(str) for str in x.split(",") if x !=""] # arr_list = list(map(int, x.split(","))) # result = round(sum(arr_list) / len(arr_list),1) # 중앙값 return arr_list def using_repeat(df, column_name): df_column_list = df.columns.tolist() df_column_list.remove(column_name) lens = [len(item) for item in df[column_name]] print(df_column_list) return pd.DataFrame({df_column_list[0]: np.repeat(df[df_column_list[0]].values, lens), df_column_list[1]: np.repeat(df[df_column_list[1]].values, lens), df_column_list[2]: np.repeat(df[df_column_list[2]].values, lens), df_column_list[3]: np.repeat(df[df_column_list[3]].values, lens), df_column_list[4]: np.repeat(df[df_column_list[4]].values, lens), df_column_list[5]: np.repeat(df[df_column_list[5]].values, lens), # df_column_list[6] :np.repeat(df[df_column_list[6]].values,lens), # df_column_list[7] :np.repeat(df[df_column_list[7]].values,lens), column_name: np.concatenate(df[column_name].values)}) dataframe['pixel'] = dataframe['WEIGHT_PREDICTION_PIXEL_MEAN'].apply(remove_str) result_df_raw= using_repeat(dataframe, 'pixel') return result_df_raw # DBSCAN def dbscan(self, dataframe): ''' :param dataframe: 하루치 씩 데이터프레임 :param house_id: 축사번호 :param year_param: 년도 :return: cluster 구분 데이터 프레임 ''' scaler = StandardScaler() df_scale = pd.DataFrame(scaler.fit_transform(pd.DataFrame(dataframe['pixel']))) model = DBSCAN(eps=1.2, min_samples=100).fit(df_scale) model.fit(df_scale) pred = model.fit_predict(df_scale) dataframe['cluster'] = pred return dataframe def tukey_outlier(self, dataframe): Q1 = np.percentile(dataframe['pixel'], 35) Q3 = np.percentile(dataframe['pixel'], 95) IQR = Q3 - Q1 lower = Q1 - 1.5 * IQR upper = Q1 + 1.5 * IQR condition = (dataframe['pixel'] >= lower) & (dataframe['pixel'] <= upper) dataframe['cluster'] = condition return dataframe # 이상치 구분 및 제거 (tukey_outlier 적용) def outlier_remove(self, dataframe, plot=False): ''' :param dataframe: raw 픽셀 데이터프레임 :return: 이상치 판단 후 제거한 데이터프레임 ''' result_df = pd.DataFrame() house_id = dataframe['HOUSE_ID'][0] start_date = dataframe['CREATE_TIME'].head(1)[0].strftime('%Y-%m-%d') end_date = dataframe['CREATE_TIME'].tail(1)[len(dataframe) - 1].strftime('%Y-%m-%d') date_range = pd.date_range(start=start_date, end=end_date) # 이상치 구분 for i in range(0, len(date_range)): if i == len(date_range)-1: condition = (dataframe['CREATE_TIME'] >= date_range[i]) else : condition = (dataframe['CREATE_TIME'] >= date_range[i]) & (dataframe['CREATE_TIME'] <= date_range[i + 1]) # result = self.dbscan(dataframe[condition]) result = self.tukey_outlier(dataframe[condition]) result_df = pd.concat((result_df, result), sort=False) condition = (result_df["cluster"] == True) if plot: plt.figure(figsize=(20, 10)) plt.title(f' Raw Pixel Outlier ') plt.scatter(x=result_df[condition]['CREATE_TIME'], y=result_df[condition]['pixel'], color='blue', label='normal') plt.scatter(x=result_df[~(condition)]['CREATE_TIME'], y=result_df[~(condition)]['pixel'], color='red', label='outlier') plt.legend(loc='best', ncol=3) plt.show() outlier_remove_raw = result_df[condition] # Pixel Median medianPixel = outlier_remove_raw.groupby(pd.Grouper(key='CREATE_TIME', freq='6h')).median()['pixel'] medianPixel = medianPixel.reset_index() return outlier_remove_raw, medianPixel def model_train(self, dataframe_pixel, train_data): ''' :param dataframe_pixel: Image raw pixel :param dataframe_weight: medianPixel, medianWeight :return: 모델 생성 ''' x_train = pd.DataFrame(train_data['pixel']) y_train = train_data['medianWeight'] # xgb_model = xgboost.XGBRegressor(booster='gblinear', n_estimators=100, learning_rate=0.08, gamma=0, # subsample=0.75, colsample_bytree=1, max_depth=6) # xgb_model = xgboost.XGBRegressor(booster='gblinear', n_estimators=600, learning_rate=0.5, gamma=0, subsample=1, # colsample_bytree=1, max_depth=6) xgb_model = xgboost.XGBRegressor(booster='gblinear', n_estimators=1500, learning_rate=0.5, gamma=0, subsample=1, eta=1, mild_child_weight=0.3, colsample_bytree=1, max_depth=0, labmda=0) poly_features = PolynomialFeatures(degree=2, include_bias=False) A_poly_train = poly_features.fit_transform(x_train) xgb_model.fit(A_poly_train, y_train) #r_sq = xgb_model.score(A_poly_train, y_train) #dataframe_pixel.rename(columns={'pixel': 'medianPixel'}, inplace=True) A_poly_test = poly_features.fit_transform(pd.DataFrame(dataframe_pixel['pixel']).astype('int64')) y_pred = xgb_model.predict(A_poly_test) dataframe_pixel['PredictWeight'] = y_pred # xgboost 예측 값 6hour 그룹화 dataframe_pixel_grouped = dataframe_pixel.groupby(pd.Grouper(key='CREATE_TIME', freq='6h')).mean() # 결측값 보정 dataframe_pixel_grouped = dataframe_pixel_grouped.interpolate() # print(dataframe_pixel_grouped) # print(dataframe_pixel_grouped.isnull().sum()) dataframe_pixel_grouped_index = dataframe_pixel_grouped.reset_index() # 평균 회귀 적용 x_train = pd.DataFrame(dataframe_pixel_grouped_index.index) y_train = dataframe_pixel_grouped['PredictWeight'] poly_features = PolynomialFeatures(degree=2, include_bias=False) A_poly_train = poly_features.fit_transform(x_train) A_poly_test = poly_features.fit_transform(pd.DataFrame(dataframe_pixel_grouped_index.index)) lin_reg = LinearRegression() lin_reg.fit(A_poly_train, y_train) y_pred = lin_reg.predict(A_poly_test) dataframe_pixel_grouped['linear predict'] = y_pred return dataframe_pixel_grouped def load_data(self): total_start_time = time.time() # 업데이트 날짜 update_date = datetime.today().strftime('%Y-%m-%d %H:%M:%S') update_date = pd.to_datetime(update_date) # print(update_date) # update_date_input = '2021-12-25' # update_date = pd.to_datetime(update_date_input) # 일령 데이터 dayAge_sql_str = f"select HOUSE_ID, IN_DATE, OUT_DATE, DAYS_AFTER_BIRTH from tbl_house_breed_hist ORDER BY IN_DATE , HOUSE_ID" house_breed_hist_total = self._dbConn.select_from_db(dayAge_sql_str) # 업데이트 된 날짜의 파스만 # house_breed_hist = house_breed_hist_total[(update_date >= house_breed_hist_total['IN_DATE']) & (update_date <= house_breed_hist_total['OUT_DATE'])] house_breed_hist = house_breed_hist_total[(update_date >= house_breed_hist_total['IN_DATE']) & (update_date <= house_breed_hist_total['OUT_DATE'])] no_out_date = False # 일령 DB 조회가 안될때 if len(house_breed_hist) == 0: no_out_date = False house_breed_hist = house_breed_hist_total[house_breed_hist_total['OUT_DATE'].isnull()] # 입식 날짜는 있는데 출하 날짜가 없는 경우 if house_breed_hist.iloc[0]['IN_DATE'] <= update_date : no_out_date = True # 해당하는 일령이 없을때 else : raise Exception("Not found tbl_house_breed_hist data") #no_out_date = True #else : # raise Exception("Not found tbl_house_breed_hist data") print(house_breed_hist) house_id_list = sorted(set(house_breed_hist['HOUSE_ID'])) for house_idx in house_id_list: start_time = time.time() # if house_idx == 'H01': # continue print(f"-------------------------------{house_idx} -------------------------------") # print(house_breed_hist[house_breed_hist['HOUSE_ID'] == house_idx]['IN_DATE'].iloc[0]) if no_out_date: # Load (pixel data and weight Data) print(house_breed_hist[house_breed_hist['HOUSE_ID'] == house_idx]['IN_DATE'].iloc[0]) pixel_sql_str = f"select CREATE_TIME, HOUSE_ID, MODULE_ID, WEIGHT_PREDICTION_PIXEL_MEAN from tbl_image_analysis_weight where MODULE_ID LIKE 'CT%' and HOUSE_ID = '{house_idx}' and CREATE_TIME >= '{house_breed_hist[house_breed_hist['HOUSE_ID'] == house_idx]['IN_DATE'].iloc[0]}'" weight_sql_str = f"select CREATE_TIME, medianWeight, medianPixel, HOUSE_ID from tbl_weight_stats where HOUSE_ID = '{house_idx}'and CREATE_TIME >= '{house_breed_hist[house_breed_hist['HOUSE_ID'] == house_idx]['IN_DATE'].iloc[0]}'" else : pixel_sql_str = f"select CREATE_TIME, HOUSE_ID, MODULE_ID, WEIGHT_PREDICTION_PIXEL_MEAN from tbl_image_analysis_weight where HOUSE_ID = '{house_idx}' and CREATE_TIME between '{house_breed_hist[house_breed_hist['HOUSE_ID'] == house_idx]['IN_DATE'].iloc[0]}' and '{house_breed_hist[house_breed_hist['HOUSE_ID'] == house_idx]['OUT_DATE'].iloc[0]}'" weight_sql_str = f"select CREATE_TIME, medianWeight, medianPixel, HOUSE_ID from tbl_weight_stats where HOUSE_ID = '{house_idx}'and CREATE_TIME between '{house_breed_hist[house_breed_hist['HOUSE_ID'] == house_idx]['IN_DATE'].iloc[0]}' and '{house_breed_hist[house_breed_hist['HOUSE_ID'] == house_idx]['OUT_DATE'].iloc[0]}'" pixel_db_data = self._dbConn.select_from_db(pixel_sql_str) pixel_db_data = pixel_db_data.sort_values(by=["CREATE_TIME"], ascending=[True]).reset_index(drop=True) pixel_db_data = pixel_db_data[pixel_db_data['WEIGHT_PREDICTION_PIXEL_MEAN'].notnull()] weight_db_data = self._dbConn.select_from_db(weight_sql_str) if len(pixel_db_data) == 0 : print(f"{house_idx} No Pixel data") continue if len(weight_db_data) == 0 : print(f"{house_idx} No Weight data") continue farm_pixel = pixel_db_data[pixel_db_data['HOUSE_ID'] == house_idx].reset_index(drop=True) farm_weight = weight_db_data[weight_db_data['HOUSE_ID'] == house_idx].reset_index(drop=True) farm_weight['CREATE_TIME'] = pd.to_datetime(farm_weight['CREATE_TIME'], format='%Y-%m-%d %H:%M:%S') # print(farm_weight['CREATE_TIME'].iloc[0]) # house_cycle = house_breed_hist[house_breed_hist['HOUSE_ID'] == house_idx] house_cycle = house_breed_hist_total[house_breed_hist_total['HOUSE_ID'] == house_idx].reset_index(drop=True) date = farm_pixel['CREATE_TIME'].iloc[0] # cycle 계산 if no_out_date : cycle = house_cycle[house_cycle.isnull()].index[0] + 1 else : cycle = house_cycle[(date >= house_cycle['IN_DATE']) & (date <= house_cycle['OUT_DATE'])].index[0] + 1 self.start_date = farm_pixel['CREATE_TIME'].head(1)[0].strftime('%Y-%m-%d') self.end_date = farm_pixel['CREATE_TIME'].tail(1)[len(farm_pixel) - 1].strftime('%Y-%m-%d') chicken_age_data = house_breed_hist[(date >= house_breed_hist['IN_DATE'])] #chicken_age = house_breed_hist[house_breed_hist['HOUSE_ID'] == house_id_list[i]].iloc[0]['IN_DATE'].strftime('%Y-%m-%d %H:%M:%S') chicken_age = chicken_age_data.iloc[0]['IN_DATE'].strftime('%Y-%m-%d %H:%M:%S') chicken_age = datetime.strptime(chicken_age, '%Y-%m-%d %H:%M:%S') raw_result = self.change_raw_pixel(farm_pixel) outlier_remove_raw, medianPixel = self.outlier_remove(raw_result) train_data = pd.merge(left=medianPixel, right=farm_weight, how='inner', on='CREATE_TIME') print("----------------Train data----------------") print(train_data) print("------------------------------------------") result = self.model_train(outlier_remove_raw, train_data) result['HOUSE_ID'] = house_idx result['cycle'] = cycle # print(result) result = result.reset_index() result.loc[result['HOUSE_ID'] == house_idx, 'dayAge'] = [round((datetime.strptime(x.strftime('%Y-%m-%d %H:%M:%S'), '%Y-%m-%d %H:%M:%S') - chicken_age).days + ((datetime.strptime(x.strftime('%Y-%m-%d %H:%M:%S'), '%Y-%m-%d %H:%M:%S') - chicken_age).seconds/3600)/24, 2) + house_breed_hist[house_breed_hist['HOUSE_ID']==house_idx]['DAYS_AFTER_BIRTH'] for x in result[result['HOUSE_ID'] == house_idx]['CREATE_TIME']] print(result[['CREATE_TIME', 'HOUSE_ID', 'linear predict', 'dayAge', 'cycle']]) # self.upload_db(result) print("time :", time.time() - start_time) print("------------------------------------------------------") self.close_db() print("finish :", time.time() - total_start_time) def upload_db(self, result): # Insert for i in range(0, len(result)) : try : #insert_str = f"insert into tbl_farm_weight_trend (CREATE_TIME, FARM_ID, GATEWAY_ID, HOUSE_ID, WEIGHT, DAY_AGE) values('{result.loc[i]['CREATE_TIME']}', '{result.loc[i]['FARM_ID']}', '{result.loc[i]['GATEWAY_ID']}', '{result.loc[i]['HOUSE_ID']}', {result.loc[i]['linear predict']}, {result.loc[i]['dayAge']}) ON DUPLICATE KEY UPDATE CREATE_TIME='{result.loc[i]['CREATE_TIME']}', FARM_ID='{result.loc[i]['FARM_ID']}', GATEWAY_ID='{result.loc[i]['GATEWAY_ID']}', HOUSE_ID = '{result.loc[i]['HOUSE_ID']}', WEIGHT = {result.loc[i]['linear predict']}, DAY_AGE={result.loc[i]['dayAge']}" insert_str = f"insert into tbl_weight_trend (CREATE_TIME, HOUSE_ID, WEIGHT, DAY_AGE, CYCLE) values('{result.loc[i]['CREATE_TIME']}', '{result.loc[i]['HOUSE_ID']}', {result.loc[i]['linear predict']}, {result.loc[i]['dayAge']}, {result.loc[i]['cycle']}) ON DUPLICATE KEY UPDATE CREATE_TIME='{result.loc[i]['CREATE_TIME']}', HOUSE_ID = '{result.loc[i]['HOUSE_ID']}', WEIGHT = {result.loc[i]['linear predict']}, DAY_AGE={result.loc[i]['dayAge']}, CYCLE={result.loc[i]['cycle']}" self._dbConn.insert_to_db(insert_str) print("DB upload Success") except Exception as e : print(e) def close_db(self): self._dbConn.close() if __name__ == '__main__': ''' drive code ''' def job(): print("Update Predict Weight every 6 hours") WeightPredict() #sched = BackgroundScheduler() #sched.add_job(job, 'cron', hour='00, 06, 12, 18', id="httpFileTransferToday_1") # sched.add_job(job, 'cron', minute='*', second='*/10', id="httpFileTransferToday_1") #sched.start() job() while True: time.sleep(5)