import ConfigParser
import pandas as pd
import datetime
from datetime import datetime
import calendar
import matplotlib.pyplot as plt
%matplotlib inline
conf = ConfigParser.RawConfigParser()
conf.read('cli.properties')
hostName = conf.get('AiravataServer', 'host')
port = conf.get('AiravataServer', 'port')
from airavata_cli import AiravataCLI
airavata_cli = AiravataCLI(hostName, int(port))
print(airavata_cli.printVersion())
##Making Sure we are connected to the right Gateway
airavata_cli.get_gatewaylist()
## List of Resources the Gateway uses
airavata_cli.computer_resources().items()
compute_resources = pd.DataFrame(list(airavata_cli.computer_resources().items()), columns=["Id", "Name"])
compute_resources
## Some other custom functions which can be created
#airavata_cli.list_of_applications('Ultrascan_Production')
airavata_cli.module_descriptions('Ultrascan_Production')
##Setting the time parameters
start= datetime(2015,7,5,15,15)
end= datetime(2016,7,17,11,59)
fromTime = calendar.timegm(start.timetuple())
toTime = calendar.timegm(end.timetuple())
## Getting the list of Experiments executed during the above mentioned period
ds=airavata_cli.experiment_statistics("Ultrascan_Production", fromTime*1000, toTime*1000)
#ds
All_Experiments = []
for i in ds.allExperiments:
All_Experiments.append([i.userName, i.name, i.statusUpdateTime, i.resourceHostId, i.projectId, i.creationTime,
i.experimentId, i.executionId, i.gatewayId, i.experimentStatus])
labels = ["User Name", "Name", "Status Update", "Resource Host ID", "Project ID", "Creation Time", "Experiment ID",
"Execution ID", "Gateway ID", "Experiment Status"]
df = pd.DataFrame(data=All_Experiments, columns=labels)
df.head()
df.shape
## Calculating percentage use of resources
ls5_cn = sum([1 for x, row in df.iterrows() if row[3] == 'ls5.tacc.utexas.edu_6dd67b08-30e5-4f74-bdd6-aad1f8310ecf' and row[9] == 'COMPLETED'])
stampede_cn = sum([1 for x, row in df.iterrows() if row[3] == 'stampede.tacc.xsede.org_bf7958ae-f9d4-468b-b146-a201fb89bf12' and row[9] == 'COMPLETED'])
comet_cn = sum([1 for x, row in df.iterrows() if row[3] == 'comet.sdsc.edu_f24b0bba-5230-498d-97e2-46a975ee035b' and row[9] == 'COMPLETED'])
gordon_cn= sum([1 for x, row in df.iterrows() if row[3] == 'gordon.sdsc.edu_f9363997-4614-477f-847e-79d262ee8ef7' and row[9] == 'COMPLETED'])
#jureca_cn = sum([1 for x, row in df.iterrows() if row[3] == 'Jureca_32098185-4396-4c11-afb7-26e991a03476' and row[9] == 'COMPLETED'])
alamo_cn = sum([1 for x, row in df.iterrows() if row[3] == 'alamo.uthscsa.edu_4793b5cc-b991-4e43-b82d-17163b64ef29' and row[9] == 'COMPLETED'])
slices= [ls5_cn,stampede_cn,comet_cn,gordon_cn,alamo_cn]
cols = ['c','m','r','w','y']
Hosts= ["lonestar","stampede","comet","gordon" , "alamo"]
plt.pie(slices,
labels= Hosts,
colors=cols,
startangle=90,
shadow= False,
autopct='%1.1f%%')
plt.title('Percentage Use by Resources')
plt.show()
##Percentage failed by resources
ls5_fn = sum([1 for x, row in df.iterrows() if row[3] == 'ls5.tacc.utexas.edu_6dd67b08-30e5-4f74-bdd6-aad1f8310ecf' and row[9] == 'FAILED'])
stampede_fn = sum([1 for x, row in df.iterrows() if row[3] == 'stampede.tacc.xsede.org_bf7958ae-f9d4-468b-b146-a201fb89bf12' and row[9] == 'FAILED'])
comet_fn = sum([1 for x, row in df.iterrows() if row[3] == 'comet.sdsc.edu_f24b0bba-5230-498d-97e2-46a975ee035b' and row[9] == 'FAILED'])
gordon_fn= sum([1 for x, row in df.iterrows() if row[3] == 'gordon.sdsc.edu_f9363997-4614-477f-847e-79d262ee8ef7' and row[9] == 'FAILED'])
#jureca_fn = sum([1 for x, row in df.iterrows() if row[3] == 'Jureca_32098185-4396-4c11-afb7-26e991a03476' and row[9] == 'FAILED'])
alamo_fn = sum([1 for x, row in df.iterrows() if row[3] == 'alamo.uthscsa.edu_4793b5cc-b991-4e43-b82d-17163b64ef29' and row[9] == 'FAILED'])
slices= [ls5_fn,stampede_fn,comet_fn,gordon_fn,alamo_fn]
cols = ['c','m','r','w','y']
Hosts= ["lonestar","stampede","comet","gordon" , "alamo"]
plt.pie(slices,
labels= Hosts,
colors=cols,
startangle=90,
shadow= False,
autopct='%1.1f%%')
plt.title('Percentage failure by Resources')
plt.show()
## Percentage cancelled by resources
ls5_xn = sum([1 for x, row in df.iterrows() if row[3] == 'ls5.tacc.utexas.edu_6dd67b08-30e5-4f74-bdd6-aad1f8310ecf' and row[9] == 'CANCELED'])
stampede_xn = sum([1 for x, row in df.iterrows() if row[3] == 'stampede.tacc.xsede.org_bf7958ae-f9d4-468b-b146-a201fb89bf12' and row[9] == 'CANCELED'])
comet_xn = sum([1 for x, row in df.iterrows() if row[3] == 'comet.sdsc.edu_f24b0bba-5230-498d-97e2-46a975ee035b' and row[9] == 'CANCELED'])
gordon_xn= sum([1 for x, row in df.iterrows() if row[3] == 'gordon.sdsc.edu_f9363997-4614-477f-847e-79d262ee8ef7' and row[9] == 'CANCELED'])
#jureca_xn = sum([1 for x, row in df.iterrows() if row[3] == 'Jureca_32098185-4396-4c11-afb7-26e991a03476' and row[9] == 'CANCELED'])
alamo_xn = sum([1 for x, row in df.iterrows() if row[3] == 'alamo.uthscsa.edu_4793b5cc-b991-4e43-b82d-17163b64ef29' and row[9] == 'CANCELED'])
slices= [ls5_xn,stampede_xn,comet_xn,gordon_xn,alamo_xn]
cols = ['c','m','r','w','y']
Hosts= ["lonestar","stampede","comet","gordon" , "alamo"]
plt.pie(slices,
labels= Hosts,
colors=cols,
startangle=90,
shadow= False,
autopct='%1.1f%%')
plt.title('Percentage canceled by Resources')
plt.show()
number = sum([1 for x, row in df.iterrows() if row[0] == 'Borries_Demeler_02d0c21b-1adf-9414-c175-d005bb256320' and row[9] == 'CANCELED'])
number
df[df['Experiment Status'] == 'CANCELED']['User Name'].value_counts().sort_values(ascending=False)
df['hour_of_day'] = df['Creation Time'].apply(lambda time: datetime.utcfromtimestamp(time/1000).hour)
df['hour_of_day'].value_counts().sort_index().plot('bar')
## Experiments failed by the hour of the day
df[df['Experiment Status'] == 'FAILED']['hour_of_day'].value_counts().sort_index().plot('bar')
## Experiments canceled by the hour of the day
df[df['Experiment Status'] == 'CANCELED']['hour_of_day'].value_counts().sort_index().plot('bar')