import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import sys, os
import datetime
from matplotlib.pylab import rcParams
%matplotlib inline
import warnings
import seaborn as sns
#from matplotlib import pyplot as plt
#import seaborn as sns
df_res_2018 = pd.read_excel('data/ResultadosCalidadDeAgua2012-2018.xlsx',
sheet_name='Resultados-calidad de agua 2012',na_values="...")
df_eti = pd.read_excel('data/ResultadosCalidadDeAgua2012-2018.xlsx',
sheet_name='Etiquetas',na_values="...")
df_sit = pd.read_excel('data/ResultadosCalidadDeAgua2012-2018.xlsx',
sheet_name='Sitios',na_values="...")
df_res_2019 = pd.read_excel('data/Resultados de Calidad del Agua 2019.xlsx',
sheet_name='Resultados 2019',na_values="...")
df_res_full = pd.concat([df_res_2018, df_res_2019], ignore_index=True, sort=False)
cols = ['SAAM','OD_mg/L','COLI_TOT','pH_CAMPO','TEMP_AGUA','NI_TOT','E_COLI','HG_TOT','PB_TOT',
'CD_TOT','CR_TOT','AS_TOT','TURBIEDAD','SST','COLOR_VER','DUR_TOT','N_TOT','COLI_FEC',
'PO4_TOT','N_NH3','N_NO2','N_NO3']
vartm10 = {}
df_nom_par_2018 = {}
df_nom_par_2019 = {}
for i in range (0,3):#index of sitios
df_Metztitlan = df_sit[df_sit["CUERPO DE AGUA"].str.contains("METZTITLAN", na=False)]
nombre_sitio = df_Metztitlan["NOMBRE DEL SITIO"].array[i]
index_namecols = df_Metztitlan.iloc[:,:]
for j in range(len(cols)):#index of cols variable
#for j in range(5,6):
df_rows = df_eti[df_eti['CLAVE PARÁMETRO'] == cols[j]]
titulo = df_rows['NOMBRE DEL PARÁMETRO']
titulo = df_rows.iloc[0,1]
df_res_2018[cols[j]] = pd.to_numeric(df_res_2018[cols[j]], downcast="float",errors='coerce')
df_res_2019[cols[j]] = pd.to_numeric(df_res_2019[cols[j]], downcast="float",errors='coerce')
df_cuerpo_de_agua = df_sit[df_sit["CUERPO DE AGUA"].str.contains("METZTITLAN", na=False)]
key_array_df = df_cuerpo_de_agua["CLAVE SITIO"].array[i]
#Data 2012-2018
serie_2018 = df_res_2018[df_res_2018["CLAVE SITIO"].str.contains(key_array_df, na=False)]
serie_2018[cols[j]].replace('\< | \>', '')
serie_2018[cols[j]].dropna()
#Data 2019
serie_2019 = df_res_2019[df_res_2019["CLAVE SITIO"].str.contains(key_array_df, na=False)]
serie_2019[cols[j]].replace('\< | \>', '')
serie_2019[cols[j]].dropna()
if ((serie_2018[cols[j]].dropna().shape[0]+serie_2019[cols[j]].dropna().shape[0])>=10):
vartm10.setdefault(i, []).append(titulo)
df_nom_par_2018.setdefault(i, []).append(serie_2018[cols[j]].dropna())
df_nom_par_2019.setdefault(i, []).append(serie_2019[cols[j]].dropna())
os.chdir("ts")
!pwd
for i in range (0,3):#index of sitios
df_Metztitlan = df_sit[df_sit["CUERPO DE AGUA"].str.contains("METZTITLAN", na=False)]
nombre_sitio = df_Metztitlan["NOMBRE DEL SITIO"].array[i]
index_namecols = df_Metztitlan.iloc[:,:]
#subtitulo = index_namecols.columns[1]+': '+nombre_sitio
subtitulo_2 = index_namecols.columns[2]+': '+index_namecols.iloc[0+i,2]
subtitulo_5 = index_namecols.columns[5]+': '+index_namecols.iloc[0+i,5]
subtitulo_6 = index_namecols.columns[6]+': '+index_namecols.iloc[0+i,6]
subtitulo_7 = index_namecols.columns[7]+': '+index_namecols.iloc[0+i,7]
subtitulo_8 = index_namecols.columns[8]+': '+index_namecols.iloc[0+i,8]
subtitulo_9 = index_namecols.columns[9]+': '+index_namecols.iloc[0+i,9]
subtitulo_10 = index_namecols.columns[10]+': '+index_namecols.iloc[0+i,10]
subtitulo_11 = index_namecols.columns[11]+': '+index_namecols.iloc[0+i,11]
for j in range(len(cols)):#index of cols variable
#for j in range(5,6):
df_rows = df_eti[df_eti['CLAVE PARÁMETRO'] == cols[j]]
titulo = df_rows['NOMBRE DEL PARÁMETRO']
titulo = df_rows.iloc[0,1]
df_res_full[cols[j]] = pd.to_numeric(df_res_full[cols[j]].interpolate(method='linear', axis=0, limit_direction='both', limit=8), downcast="float",errors='coerce')
df_cuerpo_de_agua = df_sit[df_sit["CUERPO DE AGUA"].str.contains("METZTITLAN", na=False)]
key_array_df = df_cuerpo_de_agua["CLAVE SITIO"].array[i]
#Data 2012-2018
serie_2018 = df_res_2018[df_res_2018["CLAVE SITIO"].str.contains(key_array_df, na=False)]
serie_2018_full = df_res_full[df_res_full["CLAVE SITIO"].str.contains(key_array_df, na=False)]
serie_2018_full[cols[j]].replace('\< | \>', '')
if ((serie_2018[cols[j]].dropna().shape[0]+serie_2019[cols[j]].dropna().shape[0])>=10):
serie_2018_full[['FECHA',cols[j]]].to_csv(titulo+'_'+nombre_sitio+'.csv', index=False, header=False)
Time=datetime.datetime.now()
#f_name=input('enter file name-')
#w_size=int(input('enter window size-'))
#w_incre=int(input('enter window increment-'))
#sm_step=int(input('enter step for block average for smoothing of the FI-'))
#X_tick=input('Provide step for xticks(Y)-')
f_name=titulo+'_'+nombre_sitio
w_size=8
w_incre=1
sm_step=3
X_tick='Y'
if X_tick.upper()=='Y':
xtick_step=1
else:
xtick_step='def'
def main(f_name,w_size,w_incre,xtick_step):
sys.path.append("sost_g.py")
from sost_g import SOST
SOST(f_name,w_size)
sys.path.append("fisher_p.py")
from fisher_p import FI
FI(f_name,w_size,w_incre)
sys.path.append("smooth_h.py")
from smooth_h import FI_smooth
FI_smooth(f_name,sm_step,w_size,xtick_step)
main(f_name,w_size,w_incre,xtick_step)
os.remove('FI.csv')
#print ('Total time taken-',datetime.datetime.now()-Time)
# collect all .png files in working dir
fs = !ls *.png
import IPython.display as dp
# create list of image objects
images = []
for ea in fs:
images.append(dp.Image(filename=ea, format='png'))
# display all images
for ea in images:
dp.display_png(ea)