Tryag File Manager

//paip/script/util/weightUpdator.py

import pandas as pd
import numpy as np
import os, sys
import seaborn as sns
from sklearn.cluster import DBSCAN
from datetime import date, timedelta

HOME_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir)
DATA_DIR = 'data'
MODULE_DIR = 'util'
OUTPUT_DIR = 'out'

insertDB = 1
sys.path.append(os.path.join(HOME_PATH, MODULE_DIR))
from PyDBconnector import PyDBconnector
from logs import paiplog

@paiplog
def apply_fn1(test_df, isPlot=False):
    dicts_results = pd.DataFrame(columns = ['create_time','house_id', 'weight_clustered'])
    # dicts_raw = []
    for idx, ro in enumerate(test_df.iterrows()) : 
        # if idx > 10 : break
        dataX = ro[1]['sensor_data'][1:-1]
        dataX = [float(x) for x in dataX if (x != '') and (float(x) > 30.) and (float(x) < 2000.)]
        dataX = pd.DataFrame(dataX, columns = ['sensor_data']).reset_index()
        dataX['index'] = dataX['index'].rank(method='first') / 60

model = DBSCAN(eps = 2.5 , min_samples=30, n_jobs=-1)

dataX['predict'] = model.fit_predict(dataX[[xVal,yVal]])
        dicts = dataX[dataX.predict != -1].groupby(['predict'])[yVal].mean().to_dict()
        dicts2 = dataX[dataX.predict != -1].groupby(['predict'])[yVal].count().to_dict()
        dicts2 = dict(sorted(dicts2.items(), key=lambda item: item[1], reverse=True))
        if len(dicts) == 0 : continue 
        
        #print(f"[{ro[1].house_id}] : {ro[1].create_time} ==> {max(list(dicts.values()))}")

dicts_results.loc[len(dicts_results)] = [ro[1].create_time, ro[1].house_id, max(list(dicts.values()))]

return dicts_results

@paiplog
def getRefTable(dbconn) : 
    sql_refTable = "select * from tbl_weight_ref"

try :
        refTable = dbconn.select_from_db(sql_refTable)
    except :
        raise Exception("Error to read refTable")
    return refTable

@paiplog
def getIndate(dbconn) : 
    sql_dayAge = f'''select house_id, max(in_date) as in_date
                    from tbl_house_breed_hist
                    where 1=1
                    group by house_id
                    order by create_time desc
                    '''

try :
        rows = dbconn.select_from_db(sql_dayAge)
    except :
        raise Exception("Error to read in_date from tbl house breed hist")
    return rows

@paiplog
def display_maxrow(df) : 
    pd.set_option('display.max_rows', len(df))
    print(df)
    pd.reset_option('display.max_rows')

def getMean(x) : 
    x_ = [float(y) for y in x[:-1]]
    return np.mean(x_[1:])

@paiplog
def saveDB(dbconn, results_df) : 
    errorCNT = 0
    for pdRow in results_df.iterrows() :
        insert_string = f"insert into tbl_weight_stats(create_time, house_id, medianWeight, rollingWeight) values('{pdRow[1]['create_time']}','{pdRow[1]['house_id']}','{pdRow[1]['weighting']}','{pdRow[1]['rollweight']}')"
        try :
            dbconn.insert_to_db(insert_string)
        except :
            errorCNT += 1
            print(f'insert error : {insert_string} ')
    return (errorCNT, len(results_df))

if __name__ == '__main__':
    # yesterday = date.today() - timedelta(1)
    dbconn = PyDBconnector()
    # dbconn = PyDBconnector(host='192.100.0.11')
    xVal, yVal = 'index' , 'sensor_data'
    cluster_hour = 2
    in_date_df = getIndate(dbconn)
    if len(sys.argv) > 1 and sys.argv[1] == 'all' :
        sqlStr = f'''select 
                    * 
                    from 
                    tbl_sensor_collect 
                    where sensor_type = 'chickenweight'
                    and create_time < '{date.today() - timedelta(0)}'
        '''
    else :
        sqlStr = f'''select 
                    * 
                    from 
                    tbl_sensor_collect 
                    where sensor_type = 'chickenweight'
                    and create_time >='{(pd.Timestamp.now() - pd.Timedelta(6, unit='hour')).strftime("%Y-%m-%d %H:00:00")}'
                    '''

weight_df = dbconn.select_from_db(sqlStr)[['CREATE_TIME', 'HOUSE_ID', 'MODULE_ID', 'SENSOR_DATA']]
    weight_df.columns = [str(x).lower() for x in weight_df.columns]

# weight
    weight_df['create_time'] = pd.to_datetime(weight_df.create_time, format='%Y-%m-%d %H:%M:%S')
    weight_df['house_id'] = weight_df.house_id.astype(str)
    weight_df['module_id'] = weight_df.module_id.astype(str)
    weight_df['sensor_data'] = weight_df.sensor_data.apply(lambda x : str(x).split(','))

weight_df['meanWeight'] = weight_df.sensor_data.apply(getMean)
    # gp
    test_df = weight_df[(weight_df.meanWeight > 10)].sort_values(by = ['house_id','create_time'], ascending=[True,True]). \
        groupby(['house_id', pd.Grouper(key='create_time', freq=f'{cluster_hour}h')])['sensor_data'].sum().reset_index()

# results
    dicts_results = apply_fn1(test_df)

# setup dayAge
    dicts_results['dayAge'] = [(x[0] - pd.Timestamp(str(in_date_df[in_date_df.house_id == x[1]]['in_date'].item()))).days for x in list(zip(dicts_results.create_time,dicts_results.house_id)) ]

# get ref table
    ref_table = getRefTable(dbconn)

# make columns 
    def roundUp(num):
        return int(num) + 1 if (num - int(num)) >= 0.3 else int(num)

dicts_results['refWeight'] = [ref_table.loc[x,'ref_weight'] for x in dicts_results.dayAge]
    dicts_results['isUpper'] = np.round(dicts_results.weight_clustered / dicts_results.refWeight, 2)
    dicts_results['chickCount'] = dicts_results.isUpper.apply(roundUp)
    dicts_results.loc[dicts_results.chickCount == 0, 'chickCount'] = 1
    dicts_results['weighting'] = np.round(dicts_results.weight_clustered / dicts_results.chickCount, 1)

# filter datas
    dicts_results = dicts_results[dicts_results.isUpper >= 0.5]
    dicts_results['rollweight'] = np.round(np.multiply(dicts_results.groupby('house_id')['weighting'].rolling(3, min_periods=1).mean().to_list(),1.025),1)
    dicts_results['gprank'] = dicts_results.groupby(['house_id'])['create_time'].transform('rank',ascending=False)

# get last two create_time rows of each house_id

results_df = dicts_results[dicts_results.gprank <= 2].copy()

# display_maxrow(results_df)

# DB insert
    errorCNT = 0
    if insertDB > 0 :
        result_error, result_total = saveDB(dbconn, results_df)
        dbconn.close()
        
        if result_error > 0 :
            raise Exception(f"insert DB {result_error}/{len(results_df)} error occurred")