Tryag File Manager
Home
||
Turbo Force
||
B-F Config_Cpanel
Current Path :
/
paip
/
script
/
weight
/
Or
Select Your Path :
Upload File :
New :
File
Dir
//paip/script/weight/weightUpdator(New).py
import numpy import pandas as pd import numpy as np import os, sys import seaborn as sns from sklearn.cluster import DBSCAN from datetime import date, timedelta import matplotlib.pyplot as plt HOME_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir) DATA_DIR = 'data' MODULE_DIR = 'util' OUTPUT_DIR = 'out' isUp = 50. insertDB = 1 sys.path.append(os.path.join(HOME_PATH, MODULE_DIR)) from PyDBconnector import PyDBconnector def apply_fn1(x, isPlot=True): df = pd.DataFrame(x) df_weight = pd.DataFrame(df['SENSOR_DATA'].iloc[0]) df_weight.rename(columns={0:'SENSOR_DATA'}, inplace=True) # DBSCAN # if len(x) <= 1 : return pd.DataFrame({'sensor_data' : [0], 'seqAsX' : [0], 'gp_mean' : max(df['gp_mean'] )}) model = DBSCAN(eps=15., min_samples=5, n_jobs=-1) # min_samples=5, # df['predict'] = model.fit_predict(df[['SENSOR_DATA', 'seqAsX']]) df_weight['predict'] = model.fit_predict(pd.DataFrame(df_weight['SENSOR_DATA'])) dicts = df_weight[df_weight.predict != -1].groupby(['predict'])['SENSOR_DATA'].mean().to_dict() dicts2 = df_weight[df_weight.predict != -1].groupby(['predict'])['SENSOR_DATA'].count().to_dict() if isPlot: condition = (df_weight["predict"] == -1) plt.figure(figsize=(20, 10)) plt.title(f' Raw Weight Outlier ') plt.scatter(x=df_weight[condition].index, y=df_weight[condition]['SENSOR_DATA'], color='red',label='outlier') plt.scatter(x=df_weight[~(condition)].index, y=df_weight[~(condition)]['SENSOR_DATA'], color='blue',label='normal') plt.legend(loc='best', ncol=3) plt.show() return pd.DataFrame({'house_id' : df['HOUSE_ID'].iloc[0],'module_id' : df['MODULE_ID'].iloc[0], 'create_time' : df['CREATE_TIME'].iloc[0], 'sensor_data': [x for x in dicts.values()], 'seqAsX': [x for x in dicts2.values()]}) if __name__ == '__main__': yesterday = date.today() - timedelta(1) dbconn = PyDBconnector('192.100.0.11') if len(sys.argv) > 1 and sys.argv[1] == 'all': sqlStr = f'''select * from tbl_file_collect where MODULE_ID like 'WE%' and create_time < '{date.today() - timedelta(0)} and ORDER BY create_time' ''' else : sqlStr = f'''select * from tbl_file_collect where MODULE_ID like 'WE%' and create_time >='{yesterday}' and create_time < '{date.today() - timedelta(0)} and ORDER BY create_time' ''' weight_df = dbconn.select_from_db(sqlStr)[['CREATE_TIME', 'HOUSE_ID', 'MODULE_ID', 'FILE_INFO']] # weight weight_df['CREATE_TIME'] = pd.to_datetime(weight_df['CREATE_TIME'], format='%Y-%m-%d %H:%M:%S') weight_df['HOUSE_ID'] = weight_df['HOUSE_ID'].astype(str) weight_df['MODULE_ID'] = weight_df['MODULE_ID'].astype(str) # weight_df.columns = [str(x).lower() for x in weight_df.columns] # sensor 값이 str 이기 때문에 숫자 값으로 변경 def str_change_remove(x): data = x.split(',') data = [float(x) for x in data if x != ''] data = data[1:] return data weight_df['SENSOR_DATA'] = weight_df['FILE_INFO'].apply(str_change_remove) # weight_df['gp_mean'] = weight_df.groupby(['CREATE_TIME', 'HOUSE_ID', 'MODULE_ID'])['SENSOR_DATA'].transform('mean') weight_df['seqAsX'] = weight_df.reset_index().groupby('CREATE_TIME')['index'].transform(lambda x: x.rank(method='first')) / 30 weight_df['maxWeight'] = weight_df.groupby(['CREATE_TIME', 'HOUSE_ID', 'MODULE_ID'])[['SENSOR_DATA']].transform('max') weight_gp = weight_df.groupby(['HOUSE_ID', 'MODULE_ID', 'CREATE_TIME'])[['HOUSE_ID', 'MODULE_ID','CREATE_TIME','SENSOR_DATA', 'seqAsX']].apply(apply_fn1) house_id_list = sorted(set(weight_gp['house_id'])) module_id_list = sorted(set(weight_gp['module_id'])) weight_df_total = pd.DataFrame() for house_idx in house_id_list: print(f"-------------------------------{house_idx} -------------------------------") weight_house = weight_gp[weight_gp['house_id'] == house_idx] for module_idx in module_id_list : weight_df = weight_house[weight_house['module_id'] == module_idx].groupby(pd.Grouper(key='create_time', freq='6h')).median() if len(weight_df) == 0 : continue weight_df = weight_df.reset_index() weight_df['house_id'] = house_idx weight_df['module_id'] = module_idx print(weight_df) weight_df_total = pd.concat((weight_df_total, weight_df), sort=False) # # DB insert # if insertDB > 0 : # try : # print('=========================== insert results ===========================') # for pdRow in weight_df.iterrows() : # # (CREATE_TIME, HOUSE_ID, MODULE_ID)의 값이 있으면 Update, 있으면 Insert # insert_string = f"insert into tbl_weight_stats(CREATE_TIME, HOUSE_ID, MODULE_ID, medianWeight) values('{pdRow[1]['create_time']}','{pdRow[1]['house_id']}','{pdRow[1]['module_id']}',{round(pdRow[1]['sensor_data'],1)}) ON DUPLICATE KEY UPDATE CREATE_TIME='{pdRow[1]['create_time']}', HOUSE_ID = '{pdRow[1]['house_id']}', MODULE_ID = '{pdRow[1]['module_id']}', medianWeight={round(pdRow[1]['sensor_data'],1)}" # dbconn.insert_to_db(insert_string) # except : # raise Exception('insert query error! check DB') isPlot = False if isPlot: plt.figure(figsize=(20, 10)) plt.title(f' Weight Sensor Data') plt.scatter(x=weight_df_total[weight_df_total['house_id'] == 'H01']['create_time'], y=weight_df_total[weight_df_total['house_id'] == 'H01']['sensor_data'], color='red', label='weight') plt.scatter(x=weight_df_total[weight_df_total['house_id'] == 'H02']['create_time'], y=weight_df_total[weight_df_total['house_id'] == 'H02']['sensor_data'], color='green', label='weight') plt.scatter(x=weight_df_total[weight_df_total['house_id'] == 'H03']['create_time'], y=weight_df_total[weight_df_total['house_id'] == 'H03']['sensor_data'], color='blue', label='weight') plt.scatter(x=weight_df_total[weight_df_total['house_id'] == 'H04']['create_time'], y=weight_df_total[weight_df_total['house_id'] == 'H04']['sensor_data'], color='orange', label='weight') plt.legend(loc='best', ncol=4) plt.show() dbconn.close()