In [1]:
import ConfigParser
import pandas as pd
import datetime
from datetime import datetime
import calendar
import matplotlib.pyplot as plt
%matplotlib inline
conf = ConfigParser.RawConfigParser()
conf.read('cli.properties')
hostName = conf.get('AiravataServer', 'host')
port = conf.get('AiravataServer', 'port')
In [2]:
from airavata_cli import AiravataCLI
airavata_cli = AiravataCLI(hostName, int(port))
print(airavata_cli.printVersion())
0.16.0
0.16.0

Welcome to Airavata CLI v0.0.1 - Wirtten in python


None
In [3]:
##Making Sure we are connected to the right Gateway
In [4]:
airavata_cli.get_gatewaylist()
Out[4]:
[Gateway(gatewayId='Ultrascan_Production', emailAddress=None, domain=None, gatewayName='Ultrascan_Production')]
In [5]:
## List of Resources the Gateway uses
In [6]:
airavata_cli.computer_resources().items()
Out[6]:
[('alamo.uthscsa.edu_4793b5cc-b991-4e43-b82d-17163b64ef29',
  'alamo.uthscsa.edu'),
 ('Jureca_32098185-4396-4c11-afb7-26e991a03476', 'Jureca'),
 ('comet.sdsc.edu_f24b0bba-5230-498d-97e2-46a975ee035b', 'comet.sdsc.edu'),
 ('gordon.sdsc.edu_f9363997-4614-477f-847e-79d262ee8ef7', 'gordon.sdsc.edu'),
 ('ls5.tacc.utexas.edu_6dd67b08-30e5-4f74-bdd6-aad1f8310ecf',
  'ls5.tacc.utexas.edu'),
 ('stampede.tacc.xsede.org_bf7958ae-f9d4-468b-b146-a201fb89bf12',
  'stampede.tacc.xsede.org')]
In [7]:
compute_resources = pd.DataFrame(list(airavata_cli.computer_resources().items()), columns=["Id", "Name"])
compute_resources
Out[7]:
Id Name
0 alamo.uthscsa.edu_4793b5cc-b991-4e43-b82d-1716... alamo.uthscsa.edu
1 Jureca_32098185-4396-4c11-afb7-26e991a03476 Jureca
2 comet.sdsc.edu_f24b0bba-5230-498d-97e2-46a975e... comet.sdsc.edu
3 gordon.sdsc.edu_f9363997-4614-477f-847e-79d262... gordon.sdsc.edu
4 ls5.tacc.utexas.edu_6dd67b08-30e5-4f74-bdd6-aa... ls5.tacc.utexas.edu
5 stampede.tacc.xsede.org_bf7958ae-f9d4-468b-b14... stampede.tacc.xsede.org
In [8]:
## Some other custom functions which can be created
In [9]:
#airavata_cli.list_of_applications('Ultrascan_Production')
In [10]:
airavata_cli.module_descriptions('Ultrascan_Production')     
Out[10]:
[ApplicationModule(appModuleName='Ultrascan', appModuleVersion='Ultrascan Application', appModuleId='Ultrascan_82282f1e-284f-4999-9beb-4620c485b03d', appModuleDescription=''),
 ApplicationModule(appModuleName='Ultrascan_Unicore', appModuleVersion='', appModuleId='Ultrascan_Unicore_2471953d-5d87-4ffc-b0e6-b06c86c6206d', appModuleDescription='Ultrascan Unicore Application')]
In [11]:
##Setting the time parameters
In [12]:
start= datetime(2015,7,5,15,15)
end= datetime(2016,7,17,11,59)
fromTime = calendar.timegm(start.timetuple())
toTime = calendar.timegm(end.timetuple())
In [13]:
## Getting the list of Experiments executed during the above mentioned period
In [14]:
ds=airavata_cli.experiment_statistics("Ultrascan_Production", fromTime*1000, toTime*1000)
#ds
In [15]:
All_Experiments = []
for i in ds.allExperiments:
    All_Experiments.append([i.userName, i.name, i.statusUpdateTime, i.resourceHostId, i.projectId, i.creationTime, 
                                i.experimentId, i.executionId, i.gatewayId, i.experimentStatus])
labels = ["User Name", "Name", "Status Update", "Resource Host ID", "Project ID", "Creation Time", "Experiment ID", 
          "Execution ID", "Gateway ID", "Experiment Status"]
df = pd.DataFrame(data=All_Experiments, columns=labels)
df.head()
Out[15]:
User Name Name Status Update Resource Host ID Project ID Creation Time Experiment ID Execution ID Gateway ID Experiment Status
0 Daniel_Krzizike_550162c5-88f4-5624-cd19-114778... US3-AIRA None None Default_Project_4e1dede8-0925-47e6-b61c-966051... 1468618483000 US3-AIRA_36f4788f-240b-4119-aaed-18926be8165c Ultrascan_0ed937f6-26af-4c54-8064-3be082411e46 Ultrascan_Production None
1 Paul_Willard_098ba7b6-274c-9fb4-4915-f7ecc0c7cc1f US3-AIRA None ls5.tacc.utexas.edu_6dd67b08-30e5-4f74-bdd6-aa... Default_Project_b884d629-32bf-4532-ae76-8366db... 1468616186000 US3-AIRA_408946e3-6104-48b3-a9dc-27ec03f45cb2 Ultrascan_0ed937f6-26af-4c54-8064-3be082411e46 Ultrascan_Production COMPLETED
2 Paul_Willard_098ba7b6-274c-9fb4-4915-f7ecc0c7cc1f US3-AIRA None ls5.tacc.utexas.edu_6dd67b08-30e5-4f74-bdd6-aa... Default_Project_b884d629-32bf-4532-ae76-8366db... 1468615898000 US3-AIRA_270ed9fe-f0f9-4c00-936f-72c994cb2125 Ultrascan_0ed937f6-26af-4c54-8064-3be082411e46 Ultrascan_Production COMPLETED
3 Daniel_Krzizike_550162c5-88f4-5624-cd19-114778... US3-AIRA None ls5.tacc.utexas.edu_6dd67b08-30e5-4f74-bdd6-aa... Default_Project_4e1dede8-0925-47e6-b61c-966051... 1468612459000 US3-AIRA_c23effff-bf70-4d77-968a-5982919eb3a0 Ultrascan_0ed937f6-26af-4c54-8064-3be082411e46 Ultrascan_Production EXECUTING
4 Daniel_Krzizike_550162c5-88f4-5624-cd19-114778... US3-AIRA None ls5.tacc.utexas.edu_6dd67b08-30e5-4f74-bdd6-aa... Default_Project_4e1dede8-0925-47e6-b61c-966051... 1468612433000 US3-AIRA_2e65cd86-b4f2-4514-99c2-68c6cec880f4 Ultrascan_0ed937f6-26af-4c54-8064-3be082411e46 Ultrascan_Production EXECUTING
In [16]:
df.shape
Out[16]:
(4125, 10)
In [17]:
## Calculating percentage use of resources
In [18]:
ls5_cn = sum([1 for x, row in df.iterrows() if row[3] == 'ls5.tacc.utexas.edu_6dd67b08-30e5-4f74-bdd6-aad1f8310ecf' and row[9] == 'COMPLETED'])
stampede_cn = sum([1 for x, row in df.iterrows() if row[3] == 'stampede.tacc.xsede.org_bf7958ae-f9d4-468b-b146-a201fb89bf12' and row[9] == 'COMPLETED'])
comet_cn = sum([1 for x, row in df.iterrows() if row[3] == 'comet.sdsc.edu_f24b0bba-5230-498d-97e2-46a975ee035b' and row[9] == 'COMPLETED'])
gordon_cn= sum([1 for x, row in df.iterrows() if row[3] == 'gordon.sdsc.edu_f9363997-4614-477f-847e-79d262ee8ef7' and row[9] == 'COMPLETED'])
#jureca_cn = sum([1 for x, row in df.iterrows() if row[3] == 'Jureca_32098185-4396-4c11-afb7-26e991a03476' and row[9] == 'COMPLETED'])
alamo_cn = sum([1 for x, row in df.iterrows() if row[3] == 'alamo.uthscsa.edu_4793b5cc-b991-4e43-b82d-17163b64ef29' and row[9] == 'COMPLETED'])
In [19]:
slices= [ls5_cn,stampede_cn,comet_cn,gordon_cn,alamo_cn]
cols = ['c','m','r','w','y']
Hosts= ["lonestar","stampede","comet","gordon" , "alamo"]
plt.pie(slices,
        labels= Hosts,
        colors=cols,
        startangle=90,
        shadow= False,
        autopct='%1.1f%%')

plt.title('Percentage Use by Resources')
plt.show()
In [20]:
##Percentage failed by resources
In [21]:
ls5_fn = sum([1 for x, row in df.iterrows() if row[3] == 'ls5.tacc.utexas.edu_6dd67b08-30e5-4f74-bdd6-aad1f8310ecf' and row[9] == 'FAILED'])
stampede_fn = sum([1 for x, row in df.iterrows() if row[3] == 'stampede.tacc.xsede.org_bf7958ae-f9d4-468b-b146-a201fb89bf12' and row[9] == 'FAILED'])
comet_fn = sum([1 for x, row in df.iterrows() if row[3] == 'comet.sdsc.edu_f24b0bba-5230-498d-97e2-46a975ee035b' and row[9] == 'FAILED'])
gordon_fn= sum([1 for x, row in df.iterrows() if row[3] == 'gordon.sdsc.edu_f9363997-4614-477f-847e-79d262ee8ef7' and row[9] == 'FAILED'])
#jureca_fn = sum([1 for x, row in df.iterrows() if row[3] == 'Jureca_32098185-4396-4c11-afb7-26e991a03476' and row[9] == 'FAILED'])
alamo_fn = sum([1 for x, row in df.iterrows() if row[3] == 'alamo.uthscsa.edu_4793b5cc-b991-4e43-b82d-17163b64ef29' and row[9] == 'FAILED'])
In [22]:
slices= [ls5_fn,stampede_fn,comet_fn,gordon_fn,alamo_fn]
cols = ['c','m','r','w','y']
Hosts= ["lonestar","stampede","comet","gordon" , "alamo"]
plt.pie(slices,
        labels= Hosts,
        colors=cols,
        startangle=90,
        shadow= False,
        autopct='%1.1f%%')

plt.title('Percentage failure by Resources')
plt.show()
In [23]:
## Percentage cancelled by resources
In [24]:
ls5_xn = sum([1 for x, row in df.iterrows() if row[3] == 'ls5.tacc.utexas.edu_6dd67b08-30e5-4f74-bdd6-aad1f8310ecf' and row[9] == 'CANCELED'])
stampede_xn = sum([1 for x, row in df.iterrows() if row[3] == 'stampede.tacc.xsede.org_bf7958ae-f9d4-468b-b146-a201fb89bf12' and row[9] == 'CANCELED'])
comet_xn = sum([1 for x, row in df.iterrows() if row[3] == 'comet.sdsc.edu_f24b0bba-5230-498d-97e2-46a975ee035b' and row[9] == 'CANCELED'])
gordon_xn= sum([1 for x, row in df.iterrows() if row[3] == 'gordon.sdsc.edu_f9363997-4614-477f-847e-79d262ee8ef7' and row[9] == 'CANCELED'])
#jureca_xn = sum([1 for x, row in df.iterrows() if row[3] == 'Jureca_32098185-4396-4c11-afb7-26e991a03476' and row[9] == 'CANCELED'])
alamo_xn = sum([1 for x, row in df.iterrows() if row[3] == 'alamo.uthscsa.edu_4793b5cc-b991-4e43-b82d-17163b64ef29' and row[9] == 'CANCELED'])
In [25]:
slices= [ls5_xn,stampede_xn,comet_xn,gordon_xn,alamo_xn]
cols = ['c','m','r','w','y']
Hosts= ["lonestar","stampede","comet","gordon" , "alamo"]
plt.pie(slices,
        labels= Hosts,
        colors=cols,
        startangle=90,
        shadow= False,
        autopct='%1.1f%%')

plt.title('Percentage canceled by Resources')
plt.show()

Number of Experiments Canceled by a user

In [26]:
number = sum([1 for x, row in df.iterrows() if row[0] == 'Borries_Demeler_02d0c21b-1adf-9414-c175-d005bb256320' and row[9] == 'CANCELED'])
number
Out[26]:
9

Sorting users by the number of experiments cancelled/failed

In [27]:
df[df['Experiment Status'] == 'CANCELED']['User Name'].value_counts().sort_values(ascending=False)
Out[27]:
Daniel_Krzizike_550162c5-88f4-5624-cd19-1147783ec5ed        44
Victoria_Saenz_3963bfc5-9a55-d8f4-35f2-a98819ba7825         36
Uma_Muthurajan_912a2d20-e858-a4b4-fdbf-6cdf8ec8e182         18
Shaoxiong_Tian_f3c15677-e1d3-c894-7539-005f6df5e1b6         18
Borries_Demeler_02d0c21b-1adf-9414-c175-d005bb256320         9
Akash_Bhattacharya_65724131-4b32-bec4-5974-aa29b945deaf      9
Ge_Yu_8d9a3ed0-135a-c974-69f5-44beb56d182f                   6
Aysha Kinjo_Demeler_6804fe7e-35ce-f604-3905-bb73330ef346     4
Todd_Stone_8aee0ad6-f820-8584-f18a-9a0fa325fd34              4
Chris_Pierini_1c0bf9ca-2756-f454-355c-fbce45017f8b           4
Gary_Gorbet_84f540d7-3276-8894-a544-fae60062b41c             3
Borries_Demeler_0b30314f-70d9-74d4-7564-d9684b1a9a13         3
William_Dean_e199fd03-1b9a-a214-ad3f-1d25202091c9            3
Chris_Pierini_36b27c6b-f5ad-3794-6d81-0038d249d19b           2
Aysha_Demeler_09d108a4-4a6c-f884-e971-5ea68f402fde           1
Claudius_Mundoma_e1c6c320-ca98-f704-255c-b44fd6642c4d        1
KYUNG_SU_ad389f57-0594-bd44-3986-2ea1ebe42274                1
Daniel_Krzizike_84d7ac3a-a900-aad4-790e-1797c3e2110d         1
Borries_Demeler_7db4a2d1-66dc-6b44-3935-40bceaa3a9a3         1
Todd_Stone_1768d08e-c06c-7224-a58d-32bbd9d685fb              1
Name: User Name, dtype: int64

Experiments Created by the hour of the day

In [28]:
df['hour_of_day'] = df['Creation Time'].apply(lambda time: datetime.utcfromtimestamp(time/1000).hour)
In [29]:
df['hour_of_day'].value_counts().sort_index().plot('bar')
Out[29]:
<matplotlib.axes._subplots.AxesSubplot at 0x9ac8198>
In [30]:
## Experiments failed by the hour of the day
In [31]:
df[df['Experiment Status'] == 'FAILED']['hour_of_day'].value_counts().sort_index().plot('bar')
Out[31]:
<matplotlib.axes._subplots.AxesSubplot at 0x9f408d0>
In [32]:
## Experiments canceled by the hour of the day
In [33]:
df[df['Experiment Status'] == 'CANCELED']['hour_of_day'].value_counts().sort_index().plot('bar')
Out[33]:
<matplotlib.axes._subplots.AxesSubplot at 0xa12eba8>
In [ ]: