Tryag File Manager
Home
||
Turbo Force
||
B-F Config_Cpanel
Current Path :
/
paip
/
script
/
util
/
Or
Select Your Path :
Upload File :
New :
File
Dir
//paip/script/util/weightUpdator.py
import pandas as pd import numpy as np import os, sys import seaborn as sns from sklearn.cluster import DBSCAN from datetime import date, timedelta HOME_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir) DATA_DIR = 'data' MODULE_DIR = 'util' OUTPUT_DIR = 'out' insertDB = 1 sys.path.append(os.path.join(HOME_PATH, MODULE_DIR)) from PyDBconnector import PyDBconnector from logs import paiplog @paiplog def apply_fn1(test_df, isPlot=False): dicts_results = pd.DataFrame(columns = ['create_time','house_id', 'weight_clustered']) # dicts_raw = [] for idx, ro in enumerate(test_df.iterrows()) : # if idx > 10 : break dataX = ro[1]['sensor_data'][1:-1] dataX = [float(x) for x in dataX if (x != '') and (float(x) > 30.) and (float(x) < 2000.)] dataX = pd.DataFrame(dataX, columns = ['sensor_data']).reset_index() dataX['index'] = dataX['index'].rank(method='first') / 60 model = DBSCAN(eps = 2.5 , min_samples=30, n_jobs=-1) dataX['predict'] = model.fit_predict(dataX[[xVal,yVal]]) dicts = dataX[dataX.predict != -1].groupby(['predict'])[yVal].mean().to_dict() dicts2 = dataX[dataX.predict != -1].groupby(['predict'])[yVal].count().to_dict() dicts2 = dict(sorted(dicts2.items(), key=lambda item: item[1], reverse=True)) if len(dicts) == 0 : continue #print(f"[{ro[1].house_id}] : {ro[1].create_time} ==> {max(list(dicts.values()))}") dicts_results.loc[len(dicts_results)] = [ro[1].create_time, ro[1].house_id, max(list(dicts.values()))] return dicts_results @paiplog def getRefTable(dbconn) : sql_refTable = "select * from tbl_weight_ref" try : refTable = dbconn.select_from_db(sql_refTable) except : raise Exception("Error to read refTable") return refTable @paiplog def getIndate(dbconn) : sql_dayAge = f'''select house_id, max(in_date) as in_date from tbl_house_breed_hist where 1=1 group by house_id order by create_time desc ''' try : rows = dbconn.select_from_db(sql_dayAge) except : raise Exception("Error to read in_date from tbl house breed hist") return rows @paiplog def display_maxrow(df) : pd.set_option('display.max_rows', len(df)) print(df) pd.reset_option('display.max_rows') def getMean(x) : x_ = [float(y) for y in x[:-1]] return np.mean(x_[1:]) @paiplog def saveDB(dbconn, results_df) : errorCNT = 0 for pdRow in results_df.iterrows() : insert_string = f"insert into tbl_weight_stats(create_time, house_id, medianWeight, rollingWeight) values('{pdRow[1]['create_time']}','{pdRow[1]['house_id']}','{pdRow[1]['weighting']}','{pdRow[1]['rollweight']}')" try : dbconn.insert_to_db(insert_string) except : errorCNT += 1 print(f'insert error : {insert_string} ') return (errorCNT, len(results_df)) if __name__ == '__main__': # yesterday = date.today() - timedelta(1) dbconn = PyDBconnector() # dbconn = PyDBconnector(host='192.100.0.11') xVal, yVal = 'index' , 'sensor_data' cluster_hour = 2 in_date_df = getIndate(dbconn) if len(sys.argv) > 1 and sys.argv[1] == 'all' : sqlStr = f'''select * from tbl_sensor_collect where sensor_type = 'chickenweight' and create_time < '{date.today() - timedelta(0)}' ''' else : sqlStr = f'''select * from tbl_sensor_collect where sensor_type = 'chickenweight' and create_time >='{(pd.Timestamp.now() - pd.Timedelta(6, unit='hour')).strftime("%Y-%m-%d %H:00:00")}' ''' weight_df = dbconn.select_from_db(sqlStr)[['CREATE_TIME', 'HOUSE_ID', 'MODULE_ID', 'SENSOR_DATA']] weight_df.columns = [str(x).lower() for x in weight_df.columns] # weight weight_df['create_time'] = pd.to_datetime(weight_df.create_time, format='%Y-%m-%d %H:%M:%S') weight_df['house_id'] = weight_df.house_id.astype(str) weight_df['module_id'] = weight_df.module_id.astype(str) weight_df['sensor_data'] = weight_df.sensor_data.apply(lambda x : str(x).split(',')) weight_df['meanWeight'] = weight_df.sensor_data.apply(getMean) # gp test_df = weight_df[(weight_df.meanWeight > 10)].sort_values(by = ['house_id','create_time'], ascending=[True,True]). \ groupby(['house_id', pd.Grouper(key='create_time', freq=f'{cluster_hour}h')])['sensor_data'].sum().reset_index() # results dicts_results = apply_fn1(test_df) # setup dayAge dicts_results['dayAge'] = [(x[0] - pd.Timestamp(str(in_date_df[in_date_df.house_id == x[1]]['in_date'].item()))).days for x in list(zip(dicts_results.create_time,dicts_results.house_id)) ] # get ref table ref_table = getRefTable(dbconn) # make columns def roundUp(num): return int(num) + 1 if (num - int(num)) >= 0.3 else int(num) dicts_results['refWeight'] = [ref_table.loc[x,'ref_weight'] for x in dicts_results.dayAge] dicts_results['isUpper'] = np.round(dicts_results.weight_clustered / dicts_results.refWeight, 2) dicts_results['chickCount'] = dicts_results.isUpper.apply(roundUp) dicts_results.loc[dicts_results.chickCount == 0, 'chickCount'] = 1 dicts_results['weighting'] = np.round(dicts_results.weight_clustered / dicts_results.chickCount, 1) # filter datas dicts_results = dicts_results[dicts_results.isUpper >= 0.5] dicts_results['rollweight'] = np.round(np.multiply(dicts_results.groupby('house_id')['weighting'].rolling(3, min_periods=1).mean().to_list(),1.025),1) dicts_results['gprank'] = dicts_results.groupby(['house_id'])['create_time'].transform('rank',ascending=False) # get last two create_time rows of each house_id results_df = dicts_results[dicts_results.gprank <= 2].copy() # display_maxrow(results_df) # DB insert errorCNT = 0 if insertDB > 0 : result_error, result_total = saveDB(dbconn, results_df) dbconn.close() if result_error > 0 : raise Exception(f"insert DB {result_error}/{len(results_df)} error occurred")