Tryag File Manager

//paip/script/weight/weightUpdator(New).py

import numpy
import pandas as pd
import numpy as np
import os, sys
import seaborn as sns
from sklearn.cluster import DBSCAN
from datetime import date, timedelta
import matplotlib.pyplot as plt

HOME_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir)
DATA_DIR = 'data'
MODULE_DIR = 'util'
OUTPUT_DIR = 'out'
isUp = 50.
insertDB = 1
sys.path.append(os.path.join(HOME_PATH, MODULE_DIR))
from PyDBconnector import PyDBconnector

def apply_fn1(x, isPlot=True):
    df = pd.DataFrame(x)
    df_weight = pd.DataFrame(df['SENSOR_DATA'].iloc[0])
    df_weight.rename(columns={0:'SENSOR_DATA'}, inplace=True)

# DBSCAN
    # if len(x) <= 1 : return pd.DataFrame({'sensor_data' : [0], 'seqAsX' : [0], 'gp_mean' : max(df['gp_mean'] )})
    model = DBSCAN(eps=15., min_samples=5, n_jobs=-1)  # min_samples=5,
    # df['predict'] = model.fit_predict(df[['SENSOR_DATA', 'seqAsX']])
    df_weight['predict'] = model.fit_predict(pd.DataFrame(df_weight['SENSOR_DATA']))

dicts = df_weight[df_weight.predict != -1].groupby(['predict'])['SENSOR_DATA'].mean().to_dict()
    dicts2 = df_weight[df_weight.predict != -1].groupby(['predict'])['SENSOR_DATA'].count().to_dict()

if isPlot:
        condition = (df_weight["predict"] == -1)
        plt.figure(figsize=(20, 10))
        plt.title(f' Raw Weight Outlier ')
        plt.scatter(x=df_weight[condition].index, y=df_weight[condition]['SENSOR_DATA'], color='red',label='outlier')
        plt.scatter(x=df_weight[~(condition)].index, y=df_weight[~(condition)]['SENSOR_DATA'], color='blue',label='normal')
        plt.legend(loc='best', ncol=3)
        plt.show()

return pd.DataFrame({'house_id' : df['HOUSE_ID'].iloc[0],'module_id' : df['MODULE_ID'].iloc[0], 'create_time' : df['CREATE_TIME'].iloc[0], 'sensor_data': [x for x in dicts.values()], 'seqAsX': [x for x in dicts2.values()]})

if __name__ == '__main__':
    yesterday = date.today() - timedelta(1)
    dbconn = PyDBconnector('192.100.0.11')

if len(sys.argv) > 1 and sys.argv[1] == 'all':
        sqlStr = f'''select 
                    * 
                    from 
                    tbl_file_collect 
                    where MODULE_ID like 'WE%'
                    and create_time < '{date.today() - timedelta(0)}
                    and ORDER BY create_time'
        '''
    else :
        sqlStr = f'''select 
                    * 
                    from 
                    tbl_file_collect 
                    where MODULE_ID like 'WE%'
                    and create_time >='{yesterday}' and create_time < '{date.today() - timedelta(0)}
                    and ORDER BY create_time'
                    '''

weight_df = dbconn.select_from_db(sqlStr)[['CREATE_TIME', 'HOUSE_ID', 'MODULE_ID', 'FILE_INFO']]

# weight
    weight_df['CREATE_TIME'] = pd.to_datetime(weight_df['CREATE_TIME'], format='%Y-%m-%d %H:%M:%S')
    weight_df['HOUSE_ID'] = weight_df['HOUSE_ID'].astype(str)
    weight_df['MODULE_ID'] = weight_df['MODULE_ID'].astype(str)
    # weight_df.columns = [str(x).lower() for x in weight_df.columns]

# sensor 값이 str 이기 때문에 숫자 값으로 변경
    def str_change_remove(x):
        data = x.split(',')
        data = [float(x) for x in data if x != '']
        data = data[1:]
        return data

weight_df['SENSOR_DATA'] = weight_df['FILE_INFO'].apply(str_change_remove)

# weight_df['gp_mean'] = weight_df.groupby(['CREATE_TIME', 'HOUSE_ID', 'MODULE_ID'])['SENSOR_DATA'].transform('mean')
    weight_df['seqAsX'] = weight_df.reset_index().groupby('CREATE_TIME')['index'].transform(lambda x: x.rank(method='first')) / 30
    weight_df['maxWeight'] = weight_df.groupby(['CREATE_TIME', 'HOUSE_ID', 'MODULE_ID'])[['SENSOR_DATA']].transform('max')

weight_gp = weight_df.groupby(['HOUSE_ID', 'MODULE_ID', 'CREATE_TIME'])[['HOUSE_ID', 'MODULE_ID','CREATE_TIME','SENSOR_DATA', 'seqAsX']].apply(apply_fn1)
    house_id_list = sorted(set(weight_gp['house_id']))
    module_id_list = sorted(set(weight_gp['module_id']))

weight_df_total = pd.DataFrame()
    for house_idx in house_id_list:
        print(f"-------------------------------{house_idx} -------------------------------")
        weight_house = weight_gp[weight_gp['house_id'] == house_idx]

for module_idx in module_id_list :
            weight_df = weight_house[weight_house['module_id'] == module_idx].groupby(pd.Grouper(key='create_time', freq='6h')).median()
            if len(weight_df) == 0 :
                continue
            weight_df = weight_df.reset_index()
            weight_df['house_id'] = house_idx
            weight_df['module_id'] = module_idx

print(weight_df)
            weight_df_total = pd.concat((weight_df_total, weight_df), sort=False)

# # DB insert
            # if insertDB > 0 :
            #     try :
            #         print('=========================== insert results ===========================')
            #         for pdRow in weight_df.iterrows() :
            #             # (CREATE_TIME, HOUSE_ID, MODULE_ID)의 값이 있으면 Update, 있으면 Insert
            #             insert_string = f"insert into tbl_weight_stats(CREATE_TIME, HOUSE_ID, MODULE_ID, medianWeight) values('{pdRow[1]['create_time']}','{pdRow[1]['house_id']}','{pdRow[1]['module_id']}',{round(pdRow[1]['sensor_data'],1)}) ON DUPLICATE KEY UPDATE CREATE_TIME='{pdRow[1]['create_time']}', HOUSE_ID = '{pdRow[1]['house_id']}', MODULE_ID = '{pdRow[1]['module_id']}', medianWeight={round(pdRow[1]['sensor_data'],1)}"
            #             dbconn.insert_to_db(insert_string)
            #     except :
            #         raise Exception('insert query error! check DB')
    isPlot = False
    if isPlot:
        plt.figure(figsize=(20, 10))
        plt.title(f' Weight Sensor Data')
        plt.scatter(x=weight_df_total[weight_df_total['house_id'] == 'H01']['create_time'], y=weight_df_total[weight_df_total['house_id'] == 'H01']['sensor_data'], color='red', label='weight')
        plt.scatter(x=weight_df_total[weight_df_total['house_id'] == 'H02']['create_time'], y=weight_df_total[weight_df_total['house_id'] == 'H02']['sensor_data'], color='green', label='weight')
        plt.scatter(x=weight_df_total[weight_df_total['house_id'] == 'H03']['create_time'], y=weight_df_total[weight_df_total['house_id'] == 'H03']['sensor_data'], color='blue', label='weight')
        plt.scatter(x=weight_df_total[weight_df_total['house_id'] == 'H04']['create_time'], y=weight_df_total[weight_df_total['house_id'] == 'H04']['sensor_data'], color='orange', label='weight')
        plt.legend(loc='best', ncol=4)
        plt.show()

dbconn.close()