{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"source": [
"
\n",
"\n",
"\n"
],
"metadata": {
"id": "vw9RVGBXhZbg"
}
},
{
"cell_type": "markdown",
"source": [
"# Import library, Set Working Directory and Login Info"
],
"metadata": {
"id": "Hk67Zto9LD-8"
}
},
{
"cell_type": "markdown",
"source": [
"## Install OpenSilex in the current Environment\n",
"https://github.com/OpenSILEX/opensilexClientToolsPython"
],
"metadata": {
"id": "SavIOXtCDZXm"
}
},
{
"cell_type": "code",
"source": [
"!pip install git+https://github.com/OpenSILEX/opensilexClientToolsPython.git@1.3.3"
],
"metadata": {
"id": "Gl5aMYKKZh4h"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## Import all requiered library"
],
"metadata": {
"id": "7uo_KgeWDxtw"
}
},
{
"cell_type": "code",
"source": [
"### Import library ##########################################################################################################\n",
"import os\n",
"import opensilexClientToolsPython as osC\n",
"import pandas as pd\n",
"import sys\n",
"import datetime\n",
"import time\n",
"from lxml import etree as ET\n",
"import json\n",
"from tqdm import tqdm\n",
"import yaml\n",
"from google.colab import drive\n",
"\n",
"print(\"Library Imported\")"
],
"metadata": {
"id": "lPw9hiTzEz1v"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## Setting Up the Working Environment\n",
"Mount Google Drive (Enabling the recovery of data generated in previous sessions.)\n",
"\n",
"Clone PHIS-Related Data from GitHub\n",
"\n",
"Set the Working Directory\n"
],
"metadata": {
"id": "AFsrbciiPCre"
}
},
{
"cell_type": "code",
"source": [
"### Mount Google Drive Folder ###############################################################################################\n",
"drive.mount('/content/drive')\n",
"print('Google Drive Mounted')\n",
"\n",
"### Clone all Data from GitHub ##############################################################################################\n",
"user='NaPPI-T'\n",
"token='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'\n",
"!git clone https://$user:$token@github.com/NaPPI-T/2024_PHIS_PHENET.git \"/content/drive/My Drive/Leaf_CNN/yaml\"\n",
"\n",
"### Set Database wd #########################################################################################################\n",
"wd = '/content/drive/My Drive/Leaf_CNN'\n",
"os.chdir(wd)\n",
"display(f'Working Directoty: {wd}')"
],
"metadata": {
"id": "XP9DUNtjO97i"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"# Connect to the PHIS Demo instance\n",
"https://phis.emphasis.fedcloud.eu/egi-demo/app/\n",
"\n",
"login: trainees@opensilex.org\n",
"\n",
"passowrd: trainees"
],
"metadata": {
"id": "KQJeR-DQEB76"
}
},
{
"cell_type": "code",
"source": [
"### Get Login Info ##########################################################################################################\n",
"with open (os.path.join(wd, 'yaml', 'Demo-login.yaml'), 'r') as stream:\n",
" login=yaml.safe_load(stream)\n",
"\n",
"### Connect to PHIS Database ################################################################################################\n",
"Py_Client = osC.ApiClient()\n",
"Py_Client.connect_to_opensilex_ws(identifier=login[\"Identifier\"],\n",
" password=login[\"Password\"],\n",
" host=login[\"Host\"])\n",
"#print(Py_Client.default_headers['Authorization'])\n",
"if Py_Client.default_headers:\n",
" print(\"Connected\")"
],
"metadata": {
"id": "doYEwiMlM_g9"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"# Get Experiment Info\n",
"
\n"
],
"metadata": {
"id": "kW6ArxtzETGy"
}
},
{
"cell_type": "code",
"source": [
"prefix='SP'\n",
"\n",
"### Set Experiment Name #####################################################################################################\n",
"NameExp = prefix + \"_Leaf-Detect\"\n",
"print(f'Experiment Name is \"{NameExp}\"\\n')\n",
"\n",
"\n",
"### Get Exp Info ############################################################################################################\n",
"with open (os.path.join(wd, 'yaml', 'ExpInfo.yaml'), 'r') as stream:\n",
" ExpInfo=yaml.safe_load(stream)\n",
"\n",
"display(ExpInfo)"
],
"metadata": {
"id": "Wb6lHPHFNYvc"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"# Get or Create Experiment Name&URI"
],
"metadata": {
"id": "LGNsG593a__O"
}
},
{
"cell_type": "code",
"source": [
"### Get or Create Scientific Objects Name&URI ###############################################################################\n",
"Exp_Api = osC.ExperimentsApi(Py_Client)\n",
"Exp_Src = Exp_Api.search_experiments(name=NameExp)[\"result\"]\n",
"\n",
"NameExp_uri = {}\n",
"if Exp_Src:\n",
" NameExp_uri.update({NameExp: Exp_Src[0].uri})\n",
" print(\"Experiment URI:\\n{}\".format(NameExp_uri[NameExp]))\n",
" del Exp_Api, Exp_Src\n",
"\n",
"else:\n",
" # List Exp Mandatory Info ###############################################################################################\n",
" ObjectiveExp = ExpInfo['Objective']\n",
" if ObjectiveExp != [\"\"]:\n",
" print(\"Objective: {}\".format(ObjectiveExp))\n",
" else:\n",
" sys.exit(\"Objective Missing\")\n",
"\n",
" StartExp = ExpInfo['Start Date']\n",
" #print(ExpInfo['Start Date'])\n",
" if StartExp != [\"\"]:\n",
" print(\"Start Date: {}\".format(StartExp))\n",
" else:\n",
" sys.exit(\"Starting Date Missing\")\n",
" print('#'*50)\n",
"\n",
" # Get Description Name&URI ##############################################################################################\n",
" DescriptionExp = ExpInfo['Description']\n",
" print(\"Description: {}\".format(str(DescriptionExp)))\n",
"\n",
" # Get End Date Name&URI #################################################################################################\n",
" EndExp = ExpInfo['End Date']\n",
" #print(\"End Date: {}\".format(EndExp))\n",
" if EndExp != None:\n",
" print(\"Start Date: {}\".format(EndExp))\n",
" else:\n",
" print(\"Ending Date Missing\")\n",
"\n",
" # Get Is_Public Info ####################################################################################################\n",
" Is_Public = ExpInfo['Is_Public']\n",
" print(\"Is_Public: {}\".format(str(Is_Public)))\n",
" print('#'*50)\n",
"\n",
" # Get Organisation Name&URI #############################################################################################\n",
" Org_Api = osC.OrganizationsApi(Py_Client)\n",
" Organisation = ExpInfo['organisations']\n",
" #print(\"Organization: {}\".format(str(Organisation)))\n",
" Organisation_uri = {}\n",
" if Organisation == None:\n",
" print(\"Organisation Missing\")\n",
" ls_Organisation=None\n",
" else:\n",
" Organisation = Organisation.split(\", \")\n",
" print(\"Organisation: {}\".format(str(Organisation)))\n",
" for organisation in Organisation:\n",
" Org_Src = Org_Api.search_organizations(pattern=organisation)[\"result\"]\n",
" if Org_Src:\n",
" Organisation_uri.update({organisation: Org_Src[0].uri})\n",
" print(\"{} URI: {}\".format(organisation, Org_Src[0].uri))\n",
" ls_Organisation=list(Organisation_uri.values())\n",
" else:\n",
" print(\"{}: Unknown Organisation\".format(organisation))\n",
" del organisation, Org_Src\n",
" del Org_Api, Organisation\n",
" print('#'*50)\n",
"\n",
" # Get Groups Name&URI ###################################################################################################\n",
" Sec_Api = osC.SecurityApi(Py_Client)\n",
" Groups = ExpInfo['groups']\n",
" #print(\"Groups: {}\".format(str(Groups)))\n",
" Groups_uri = {}\n",
" if Groups == None:\n",
" print(\"Group Missing\")\n",
" ls_Groups=None\n",
" else:\n",
" Groups = Groups.split(\", \")\n",
" for group in Groups:\n",
" Sec_Src = Sec_Api.search_groups(name=group)[\"result\"]\n",
" if Sec_Src:\n",
" Groups_uri.update({group: Sec_Src[0].uri})\n",
" print(\"{} URI: {}\".format(group, Sec_Src[0].uri))\n",
" ls_Groups=list(Groups_uri.values())\n",
" else:\n",
" print(\"{}: Unknown Group\".format(group))\n",
" del group, Sec_Src\n",
" del Sec_Api, Groups\n",
" print('#'*50)\n",
"\n",
" # Get Scientific Supervisor Name&URI ####################################################################################\n",
" Sec_Api = osC.SecurityApi(Py_Client)\n",
" Scientific_Supervisors = ExpInfo['scientific_supervisors']\n",
" #print(\"Scientific Supervisors: {}\".format(str(Scientific_Supervisors)))\n",
" Scientific_Supervisors_uri = {}\n",
" if Scientific_Supervisors == None:\n",
" print(\"Scientific Supervisors Missing\")\n",
" ls_Scientific_Supervisors=None\n",
" else:\n",
" Scientific_Supervisors = Scientific_Supervisors.split(\", \")\n",
" for scisup in Scientific_Supervisors:\n",
" Sec_Src = Sec_Api.search_users(name=scisup)[\"result\"]\n",
" if Sec_Src:\n",
" Scientific_Supervisors_uri.update({scisup: Sec_Src[0].uri})\n",
" print(\"{} URI: {}\".format(scisup, Sec_Src[0].uri))\n",
" ls_Scientific_Supervisors=list(Scientific_Supervisors_uri.values())\n",
" else:\n",
" print(\"{}: Unknown Scientific Supervisors\".format(scisup))\n",
" del scisup, Sec_Src\n",
" del Sec_Api, Scientific_Supervisors\n",
" print('#'*50)\n",
"\n",
" # Get Technical Supervisor Name&URI #####################################################################################\n",
" Sec_Api = osC.SecurityApi(Py_Client)\n",
" Technical_Supervisors = ExpInfo['technical_supervisors']\n",
" #print(\"Technical Supervisors: {}\".format(str(Technical_Supervisors)))\n",
" Technical_Supervisors_uri = {}\n",
" if Technical_Supervisors == None:\n",
" print(\"Technical Supervisors Missing\")\n",
" ls_Technical_Supervisors=None\n",
"\n",
" else:\n",
" Technical_Supervisors = Technical_Supervisors.split(\", \")\n",
" for techsup in Technical_Supervisors:\n",
" Sec_Src = Sec_Api.search_users(name=techsup)[\"result\"]\n",
" if Sec_Src:\n",
" Technical_Supervisors_uri.update({techsup: Sec_Src[0].uri})\n",
" print(\"{} URI: {}\".format(techsup, Sec_Src[0].uri))\n",
" ls_Technical_Supervisors=list(Technical_Supervisors_uri.values())\n",
"\n",
" else:\n",
" print(\"{}: Unknown Technical Supervisors\".format(techsup))\n",
" del techsup, Sec_Src\n",
" del Sec_Api, Technical_Supervisors\n",
" print('#'*50)\n",
"\n",
" # Create Experiment #####################################################################################################\n",
" body = osC.ExperimentCreationDTO(\n",
" name=NameExp,\n",
" start_date=StartExp,\n",
" end_date=EndExp,\n",
" description=DescriptionExp,\n",
" objective=ObjectiveExp,\n",
" organisations=ls_Organisation,\n",
" scientific_supervisors=ls_Scientific_Supervisors,\n",
" technical_supervisors=ls_Technical_Supervisors,\n",
" groups=ls_Groups,\n",
" is_public=Is_Public)\n",
" Api_Resp = Exp_Api.create_experiment(body=body,)\n",
" print(\"Experiment Creation: {}\".format(str(Api_Resp[\"metadata\"][\"datafiles\"])))\n",
"\n",
" # Get Experiment Name&URI ###############################################################################################\n",
" Exp_Src = Exp_Api.search_experiments(name=NameExp)\n",
" NameExp_uri.update({NameExp: Exp_Src[\"result\"][0].uri})\n",
" print( \"{} URI: {}\".format(NameExp, NameExp_uri[NameExp]))\n",
" del Exp_Api, body, Api_Resp, DescriptionExp, ObjectiveExp, StartExp, EndExp, Is_Public, Organisation_uri, Groups_uri, Scientific_Supervisors_uri, Technical_Supervisors_uri"
],
"metadata": {
"id": "ZYb3q7oydBUF"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"# Reference Scientific Object\n",
"Your observed object (e.g., Plant, Leaf, Plot) is linked to several metadata elements that provide essential context and description:\n",
"\n",
"\n",
"\n",
"* Has Germplasm: Specifies the genetic material or cultivar used in the\n",
"experiment.\n",
"* Has Factor Level: Describes the specific treatment or environmental condition applied (e.g., drought level, fertilizer type, temperature).\n",
"* Has Facility: Indicates the locationl or controlled environment where the observation was conducted.\n",
"* ...\n",
"\n",
"These metadata help ensure that the scientific object is fully described."
],
"metadata": {
"id": "bf-UXs9ylibP"
}
},
{
"cell_type": "markdown",
"source": [
"
"
],
"metadata": {
"id": "5sNNMalJjOGg"
}
},
{
"cell_type": "markdown",
"source": [
"## Get or Create Germplasms Name&URI"
],
"metadata": {
"id": "Ga7wEyFYbNYa"
}
},
{
"cell_type": "code",
"source": [
"### Get Germplasms Info ######################################################################################################\n",
"with open (os.path.join(wd, 'yaml', 'Germplasm.yaml'), 'r') as stream:\n",
" Germplasms=yaml.safe_load(stream)\n",
"\n",
"display(Germplasms)"
],
"metadata": {
"id": "aiqA81qgqCtt"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"### Get Subtaxa Name&URI ####################################################################################################\n",
"Germ_Api = osC.GermplasmApi(Py_Client)\n",
"\n",
"Species_uri={}\n",
"\n",
"for germplasm, rdftype in Germplasms.items():\n",
" Germ_Src = Germ_Api.search_germplasm(name=germplasm,\n",
" rdf_type=rdftype)[\"result\"]\n",
" if Germ_Src:\n",
" Species_uri.update({germplasm: Germ_Src[0].uri})\n",
" print(\"{}, {} URI: {}\".format(germplasm, rdftype, Species_uri[germplasm]))\n",
"\n",
" else:\n",
" check_only = False\n",
" body = osC.GermplasmCreationDTO(\n",
" name=germplasm,\n",
" rdf_type=rdftype)\n",
"\n",
" Germ_Api.create_germplasm(body=body, check_only=check_only)\n",
" Germ_Src = Germ_Api.search_germplasm(name=germplasm,\n",
" rdf_type=rdftype)[\"result\"]\n",
" Species_uri.update({germplasm: Germ_Src[0].uri})\n",
" print(\"{}, {} Creation: {}\".format(germplasm, rdftype, Germ_Src[0].uri))\n",
"del germplasm, Germ_Src"
],
"metadata": {
"id": "66NUcXBjwxFT"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## Get or Create Factors&Levels Names&URI\n",
"
"
],
"metadata": {
"id": "8n8C-jFMbIa3"
}
},
{
"cell_type": "code",
"source": [
"### Get Exp Info ############################################################################################################\n",
"with open (os.path.join(wd, 'yaml', 'Factors.yaml'), 'r') as stream:\n",
" Factors=yaml.safe_load(stream)\n",
"\n",
"display(Factors)"
],
"metadata": {
"id": "lwb-f8EGkC1e"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"### Search Factors ##########################################################################################################\n",
"Fac_Api = osC.FactorsApi(Py_Client)\n",
"Factors_uri = {}\n",
"Factors_Levels_uri = {}\n",
"for factor in Factors:\n",
" Fac_Src = Fac_Api.search_factors(name=factor,\n",
" experiment=NameExp_uri[NameExp])[\"result\"]\n",
"\n",
" # Get Existing Factors Name&URI #########################################################################################\n",
" if Fac_Src:\n",
" Factors_uri.update({factor: Fac_Src[0].uri})\n",
" del Fac_Src\n",
"\n",
" # Create Factors ########################################################################################################\n",
" else:\n",
" # Creation of LevelsDTO #############################################################################################\n",
" lvls = {}\n",
" for i in Factors:\n",
" lvl = []\n",
" for j in Factors[i]['Levels']:\n",
" lvl.append(osC.FactorLevelCreationDTO(name=j))\n",
" lvls[i] = lvl\n",
"\n",
" # Creation of FactorDTO #############################################################################################\n",
" bodies = []\n",
" for i in lvls:\n",
" body = osC.FactorCreationDTO(name=i,\n",
" levels=lvls[i],\n",
" experiment=NameExp_uri[NameExp],\n",
" description=Factors[i]['Description'])\n",
" bodies.append(body)\n",
"\n",
" # Creation of Factors ###############################################################################################\n",
" for i in bodies:\n",
" Api_Resp = Fac_Api.create_factor(body=i,)\n",
" print(\"Factors Creation: {}\".format(str(Api_Resp[\"metadata\"][\"datafiles\"])))\n",
" del lvls, lvl, bodies, body, Api_Resp, Fac_Src, i, j\n",
"\n",
" # Get New Factors Name&URI ##########################################################################################\n",
" Fac_Src = Fac_Api.search_factors(name=factor,\n",
" experiment=NameExp_uri[NameExp])[\"result\"]\n",
" Factors_uri.update({factor: Fac_Src[0].uri})\n",
"\n",
"## Get Factors Levels Name&URI ##############################################################################################\n",
"for fac_uri in Factors_uri.values():\n",
" Fac_Get = Fac_Api.get_factor_levels(uri=fac_uri)[\"result\"]\n",
" for lvl in Fac_Get:\n",
" Factors_Levels_uri.update({lvl.name: lvl.uri})\n",
"del factor, fac_uri, Fac_Get, lvl\n",
"\n",
"## Print Factors Name&URI ###################################################################################################\n",
"for factor in Factors_uri:\n",
" print(\"{} URI: {}\".format(factor, Factors_uri[factor]))\n",
"for lvl in Factors_Levels_uri:\n",
" print(\"{} URI: {}\".format(lvl, Factors_Levels_uri[lvl]))\n",
"del factor, lvl, Fac_Api, Factors"
],
"metadata": {
"id": "gOoYglWZq6ga"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## Get Metadata & Numerical Data generated by the CNN Model"
],
"metadata": {
"id": "9f1bi8nPbQoZ"
}
},
{
"cell_type": "code",
"source": [
"### Get Metadata & Data ####################################################################################################\n",
"df_meta = pd.read_csv(os.path.join(wd, 'yaml', 'Metadata.csv'))\n",
"df_meta['ScObj Name'] = prefix + '_' + df_meta['image_id']\n",
"\n",
"display(df_meta.head())\n",
"\n",
"df_predic = pd.read_csv(os.path.join(wd, 'detection_summary.csv'))\n",
"df_predic['ScObj Name'] = prefix + '_' + df_predic['image_id']\n",
"\n",
"display(df_predic.head())\n",
"\n",
"df=pd.merge(df_meta, df_predic, on=['image_id', 'ScObj Name'])\n",
"display(df.head())"
],
"metadata": {
"id": "GrnJhp5ObTH5"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## Get or Create Scientific Objects Name&URI\n",
"
\n",
"\n",
"
\n"
],
"metadata": {
"id": "Zu6w8c4sbbri"
}
},
{
"cell_type": "code",
"source": [
"df_meta.head()"
],
"metadata": {
"id": "0B-aFNCS4kmV"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"### Get or Create Scientific Objects Name&URI ###############################################################################\n",
"Relations_Gen = []\n",
"\n",
"### ObjectRealtionDTO for Start Date ########################################################################################\n",
"StartExp = ExpInfo['Start Date']\n",
"if StartExp != None:\n",
" relation_temp = osC.RDFObjectRelationDTO(\n",
" _property=\"vocabulary:hasCreationDate\",\n",
" value=StartExp)\n",
" Relations_Gen.append(relation_temp)\n",
" del relation_temp, StartExp\n",
"\n",
"else:\n",
" print('Start Date Missing')\n",
"\n",
"EndExp = ExpInfo['End Date']\n",
"if EndExp != None:\n",
" EndExp = datetime.strptime(EndExp, \"%d/%m/%Y\")\n",
" relation_temp = osC.RDFObjectRelationDTO(\n",
" _property=\"vocabulary:hasDestructionDate\",\n",
" value=EndExp)\n",
" Relations_Gen.append(relation_temp)\n",
" del relation_temp, EndExp\n",
"else:\n",
" print('End Date Missing')\n",
"\n",
"### Scientific Object RDF Type ##############################################################################################\n",
"BioMat_Type = ExpInfo['RDF Type']\n",
"if not BioMat_Type:\n",
" sys.exit(\"Scientific Object RDF Type Missing\")\n",
"else:\n",
" for biomat in BioMat_Type:\n",
" Onto_Api = osC.OntologyApi(Py_Client)\n",
" Onto_Src = Onto_Api.search_sub_classes_of(\n",
" name=biomat, parent_type=\"vocabulary:ScientificObject\")[\"result\"]\n",
" if Onto_Src:\n",
" rdf_type = Onto_Src[0].children[0].uri\n",
" else:\n",
" sys.exit(\"Scientific Object RDF Type Unknown\")\n",
" del biomat, BioMat_Type, Onto_Api, Onto_Src\n",
"\n",
"### Get Scientific Objects Name&URI #########################################################################################\n",
"ScObj_Api = osC.ScientificObjectsApi(Py_Client)\n",
"ScObj_uri = {}\n",
"for index, row in tqdm(df_meta.iterrows(), desc=\"ScObj processing:\"):\n",
" ScObj_Src = ScObj_Api.search_scientific_objects(name=row[\"ScObj Name\"])[\"result\"]\n",
" if ScObj_Src:\n",
" ScObj_uri.update({row[\"ScObj Name\"]: ScObj_Src[0].uri})\n",
" else:\n",
" Relations_ScObj=[]\n",
"\n",
" # ObjectRealtionDTO for Germplasm ###################################################################################\n",
" if Species_uri:\n",
" relation_temp = osC.RDFObjectRelationDTO(\n",
" _property=\"vocabulary:hasGermplasm\",\n",
" value=Species_uri.get(row[\"Species\"]))\n",
" Relations_ScObj.append(relation_temp)\n",
" del relation_temp\n",
"\n",
" # ObjectRealtionDTO for Factors #####################################################################################\n",
" if Factors_Levels_uri:\n",
" Factors_temp = {\"Factors\": row[\"Plant Info\"].split(\"_\")}\n",
" for factor in Factors_temp[\"Factors\"]:\n",
" relation_temp = osC.RDFObjectRelationDTO(\n",
" _property=\"vocabulary:hasFactorLevel\",\n",
" value=Factors_Levels_uri.get(factor))\n",
" Relations_ScObj.append(relation_temp)\n",
" del relation_temp\n",
" Relations = Relations_Gen + Relations_ScObj\n",
"\n",
" # Creation of Scientific Object #####################################################################################\n",
" body = osC.ScientificObjectCreationDTO(name=row[\"ScObj Name\"],\n",
" rdf_type=rdf_type,\n",
" relations=Relations,\n",
" experiment=NameExp_uri[NameExp])\n",
" ScObj_Api.create_scientific_object(body, )\n",
" del Relations, Relations_ScObj\n",
"\n",
" # Get New Scientific Object Name&URI ################################################################################\n",
" ScObj_Src = ScObj_Api.search_scientific_objects(name=row[\"ScObj Name\"])[\"result\"]\n",
" ScObj_uri.update({row[\"ScObj Name\"]: ScObj_Src[0].uri})\n",
"print(\"Done\")\n",
"del index, row, ScObj_Src, ScObj_Api, Relations_Gen, rdf_type\n",
"\n",
"display(ScObj_uri)"
],
"metadata": {
"id": "0X_WNtPK-x7T"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"# Reference Provenance\n",
"
"
],
"metadata": {
"id": "EMsiCAecbgjw"
}
},
{
"cell_type": "markdown",
"source": [
"## Get or Create Original Images Provenance Name&URI"
],
"metadata": {
"id": "BHKQbMn9L-u9"
}
},
{
"cell_type": "code",
"source": [
"prov_dict={}\n",
"\n",
"### Get or Create Original Images Provenance ################################################################################\n",
"Dat_Api = osC.DataApi(Py_Client)\n",
"\n",
"prov = \"Canon-OriginalImages\"\n",
"\n",
"Prov_Src = Dat_Api.search_provenance(name=prov,)[\"result\"]\n",
"if Prov_Src:\n",
" prov_dict.update({prov: Prov_Src[0].uri})\n",
" print(\"{} URI: {}\".format(prov,Prov_Src[0].uri))\n",
"else:\n",
" description = \"Images acquired by a Canon DSLR Camera\"\n",
"\n",
" prov_activity = [osC.ActivityCreationDTO(rdf_type=\"vocabulary:ImageAcquisition\")]\n",
"\n",
" prov_agent = [\n",
"\n",
" osC.AgentModel(uri=\"http://phis.egi-demo.eu/id/device/field_camera\",\n",
" rdf_type=\"vocabulary:SensingDevice\",\n",
" settings={})]\n",
"\n",
" body = osC.ProvenanceCreationDTO(name=prov,\n",
" description=description,\n",
" prov_agent=prov_agent,\n",
" prov_activity=prov_activity)\n",
"\n",
" Api_Resp = Dat_Api.create_provenance(body=body, )\n",
" print(\"Provenance Created: {}\".format(str(Api_Resp[\"metadata\"][\"datafiles\"])))\n",
" Prov_Src = Dat_Api.search_provenance(name=prov,)[\"result\"]\n",
" prov_dict.update({prov: Prov_Src[0].uri})\n",
" print(\"{} URI Created: {}\".format(prov, Prov_Src[0].uri))\n",
" del description, prov_activity, prov_agent, body, Api_Resp\n",
"del prov, Dat_Api, Prov_Src"
],
"metadata": {
"id": "rzv31yk3NUAg"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## Get or Create BBoxed Images Provenance Name&URI"
],
"metadata": {
"id": "LotF4xOiMQH2"
}
},
{
"cell_type": "code",
"source": [
"### Get or Create Leaf_CNN BBox Images Provenance ###########################################################################\n",
"Dat_Api = osC.DataApi(Py_Client)\n",
"\n",
"prov = 'Leaf_CNN-BBoxImage'\n",
"\n",
"Prov_Src = Dat_Api.search_provenance(name=prov,)[\"result\"]\n",
"if Prov_Src:\n",
" prov_dict.update({prov: Prov_Src[0].uri})\n",
" print(\"{} URI: {}\".format(prov, Prov_Src[0].uri))\n",
"else:\n",
" description = \"Images annotated with Bounding Box predicted with a CNN\"\n",
"\n",
" prov_activity = [osC.ActivityCreationDTO(rdf_type=\"vocabulary:ImageAnalysis\")]\n",
"\n",
" prov_agent = [\n",
" osC.AgentModel(uri=\"http://phis.egi-demo.eu/id/device/leaf_cnn-v1\",\n",
" rdf_type=\"vocabulary:Software\"),\n",
"\n",
" osC.AgentModel(uri=\"http://phis.egi-demo.eu/id/device/field_camera\",\n",
" rdf_type=\"vocabulary:SensingDevice\",\n",
" settings={})]\n",
"\n",
" body = osC.ProvenanceCreationDTO(name=prov,\n",
" description=description,\n",
" prov_agent=prov_agent,\n",
" prov_activity=prov_activity)\n",
"\n",
" Api_Resp = Dat_Api.create_provenance(body=body, )\n",
" print(\"Provenance Created: {}\".format(str(Api_Resp[\"metadata\"][\"datafiles\"])))\n",
" Prov_Src = Dat_Api.search_provenance(name=prov,)[\"result\"]\n",
" prov_dict.update({prov: Prov_Src[0].uri})\n",
" print(\"{} URI Created: {}\".format(prov, Prov_Src[0].uri))\n",
" del description, prov_activity, prov_agent, body, Api_Resp\n",
"del prov, Dat_Api, Prov_Src"
],
"metadata": {
"id": "R9qb2-e1PHNY"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## Get or Create Data Provenance Name&URI"
],
"metadata": {
"id": "fsc5fw4BMS8Y"
}
},
{
"cell_type": "code",
"source": [
"### Get or Create Leaf_CNN Data Provenance ##################################################################################\n",
"Dat_Api = osC.DataApi(Py_Client)\n",
"\n",
"prov = 'Leaf_CNN-PredictedData'\n",
"\n",
"Prov_Src = Dat_Api.search_provenance(name=prov,)[\"result\"]\n",
"if Prov_Src:\n",
" prov_dict.update({prov: Prov_Src[0].uri})\n",
" print(\"{} URI: {}\".format(prov, Prov_Src[0].uri))\n",
"else:\n",
" description = \"Data generated by CNN including Leaf Count, Coverage Height and Coverage Width\"\n",
" prov_activity = [osC.ActivityCreationDTO(rdf_type=\"vocabulary:ImageAnalysis\")]\n",
" prov_agent = [\n",
"\n",
" osC.AgentModel(uri=\"http://phis.egi-demo.eu/id/device/leaf_cnn-v1\",\n",
" rdf_type=\"vocabulary:Software\")\n",
" ]\n",
"\n",
" body = osC.ProvenanceCreationDTO(name=prov,\n",
" description=description,\n",
" prov_agent=prov_agent,\n",
" prov_activity=prov_activity)\n",
"\n",
" Api_Resp = Dat_Api.create_provenance(body=body, )\n",
" print(\"Provenance Created: {}\".format(str(Api_Resp[\"metadata\"][\"datafiles\"])))\n",
" Prov_Src = Dat_Api.search_provenance(name=prov,)[\"result\"]\n",
" prov_dict.update({prov: Prov_Src[0].uri})\n",
" print(\"{} URI Created: {}\".format(prov, Prov_Src[0].uri))\n",
" del description, prov_activity, prov_agent, body, Api_Resp\n",
"del prov, Dat_Api, Prov_Src"
],
"metadata": {
"id": "T4KTD7ZjbigI"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"# Import datafiles (Images)\n",
"This section handles the import of all image data files."
],
"metadata": {
"id": "gwf71udLVJ6Q"
}
},
{
"cell_type": "markdown",
"source": [
"## List all images"
],
"metadata": {
"id": "JC2jlYpCVXHJ"
}
},
{
"cell_type": "code",
"source": [
"### List Original Images ####################################################################################################\n",
"\n",
"wd_img = os.path.join(wd, 'Leaf','test', 'leaf')\n",
"\n",
"ls_img = []\n",
"for (root, dirs, files) in os.walk(wd_img):\n",
" for filename in files:\n",
" if filename.endswith(\".jpg\"):\n",
" ls_img.append(os.path.join(root, filename))\n",
"\n",
"print(f'Numbers of Original Img: {len(ls_img)}')\n",
"\n",
"### List BBoxed Images ######################################################################################################\n",
"wd_bbox = os.path.join(wd, 'runs','detect', 'predict')\n",
"\n",
"ls_bbox = []\n",
"for (root, dirs, files) in os.walk(wd_bbox):\n",
" for filename in files:\n",
" if filename.endswith(\".jpg\"):\n",
" ls_bbox.append(os.path.join(root, filename))\n",
"\n",
"print(f'Numbers of BBoxed Img: {len(ls_bbox)}')"
],
"metadata": {
"id": "XgOz6HW0VJDs"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## Get Original Images Metadata\n",
"
"
],
"metadata": {
"id": "WbsbM_QT1TWN"
}
},
{
"cell_type": "code",
"source": [
"prov='Canon-OriginalImages'\n",
"### Initialize an empty list to store all the dictionaries ##################################################################\n",
"Img_Name_All = []\n",
"### Loop through each file in ls_fec ########################################################################################\n",
"for img in ls_img:\n",
"\n",
" # Extract the base filename and remove the \".jpg\" extension #############################################################\n",
" target = os.path.basename(img).replace(\".jpg\", \"\")\n",
"\n",
" # Create the dictionary with common keys ################################################################################\n",
" Img_Dict = {\n",
" \"Path\": img,\n",
" \"Target\": prefix + '_' + target,\n",
" 'Prov': prov_dict[prov],\n",
" }\n",
"\n",
" # Append the dictionary to the list #####################################################################################\n",
" Img_Name_All.append(Img_Dict)\n",
"\n",
"### Display the first dictionary in the list ################################################################################\n",
"display(Img_Name_All[0])\n",
"print(len(Img_Name_All))"
],
"metadata": {
"id": "8TQtc48GWyDP"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## Check if any Original Images already exist"
],
"metadata": {
"id": "2mmRA-k8YssW"
}
},
{
"cell_type": "code",
"source": [
"prov='Canon-OriginalImages'\n",
"\n",
"### Filter out existing Original images #####################################################################################\n",
"Dat_Api = osC.DataApi(Py_Client)\n",
"Dat_Src = Dat_Api.get_data_file_descriptions_by_search(provenances=[prov_dict[prov]], experiments=[NameExp_uri[NameExp]], page_size=100000)[\"result\"]\n",
"\n",
"Img_Name_Com=[]\n",
"for i in Img_Name_All:\n",
" for elts in Dat_Src:\n",
" if ScObj_uri[i[\"Target\"]] == elts.target:\n",
" Img_Name_Com.append(i)\n",
" del elts\n",
"del i\n",
"print(\"{} images over {} already exit on the database\".format(len(Img_Name_Com), len(Img_Name_All)))\n",
"\n",
"### Exclude existing one from the Dict ######################################################################################\n",
"Img_Name=[]\n",
"for i in Img_Name_All:\n",
" if i not in Img_Name_Com:\n",
" Img_Name.append(i)\n",
"del i, Img_Name_All, Img_Name_Com, Dat_Src"
],
"metadata": {
"id": "p7vuKLtiYksk"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## Import Original Images\n",
"
"
],
"metadata": {
"id": "dRNb3rrxZ37T"
}
},
{
"source": [
"### Import Original Images ##################################################################################################\n",
"Py_Client.connect_to_opensilex_ws(identifier=login[\"Identifier\"],\n",
" password=login[\"Password\"],\n",
" host=login[\"Host\"])\n",
"\n",
"timelimit = datetime.datetime.now() + datetime.timedelta(minutes=30)\n",
"\n",
"Dat_Api = osC.DataApi(Py_Client)\n",
"for img in tqdm(Img_Name):\n",
" description={\"rdf_type\": \"vocabulary:RGBImage\", #<- Here a DTO object was orginially proposed\n",
" \"date\": str(ExpInfo['Start Date']),\n",
" \"target\": ScObj_uri[img[\"Target\"]],\n",
" \"provenance\": {\"uri\": img[\"Prov\"],\n",
" \"experiments\": [NameExp_uri[NameExp]]\n",
" }\n",
" }\n",
" Dat_Api.post_data_file(description=json.dumps(description), file=img[\"Path\"], ) #<- DTO object was replaced by json.dumps()\n",
" #display(description)\n",
" #print('#'*100)\n",
" del img, description\n",
"\n",
"if datetime.datetime.now() > timelimit:\n",
" Py_Client.connect_to_opensilex_ws(identifier=login[\"Identifier\"],\n",
" password=login[\"Password\"],\n",
" host=login[\"Host\"])\n",
" Dat_Api = osC.DataApi(Py_Client)\n",
" timelimit = datetime.datetime.now()+datetime.timedelta(minutes=30)\n",
" #print(Py_Client.default_headers['Authorization'])\n",
"\n",
"print ('Done')"
],
"cell_type": "code",
"metadata": {
"id": "m4PKzUsAa_IE"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## Get Original datafile URI"
],
"metadata": {
"id": "TdIE6FoKveP2"
}
},
{
"cell_type": "code",
"source": [
"prov='Canon-OriginalImages'\n",
"\n",
"### Recover Original URI ####################################################################################################\n",
"Dat_Api = osC.DataApi(Py_Client)\n",
"Dat_Src = Dat_Api.get_data_file_descriptions_by_search(provenances=[prov_dict[prov]], experiments=[NameExp_uri[NameExp]], page_size=100000)[\"result\"]\n",
"\n",
"Img_uri=[]\n",
"for elts in Dat_Src:\n",
" for k, v in ScObj_uri.items():\n",
" if v == elts.target:\n",
" target=k\n",
"\n",
" Img_uri.append({'Type': 'Original',\n",
" \"Target URI\": [value for key, value in ScObj_uri.items() if key == target][0],\n",
" \"Target Name\": target,\n",
" \"uri\": elts.uri})\n",
"del elts, k, v, Dat_Src, Dat_Api\n",
"\n",
"display(Img_uri[0])"
],
"metadata": {
"id": "SNlk_VNslBpB"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## Get Bounding Boxed Images Metadata\n",
"
\n",
"\n",
"\n"
],
"metadata": {
"id": "BynLCeAY1MM5"
}
},
{
"cell_type": "code",
"source": [
"prov='Leaf_CNN-BBoxImage'\n",
"\n",
"### Initialize an empty list to store all the dictionaries ##################################################################\n",
"BBox_Name_All = []\n",
"\n",
"### Loop through each file in ls_fec ########################################################################################\n",
"for bbox in ls_bbox:\n",
" # Extract the base filename and remove the \".jpg\" extension #############################################################\n",
" target = os.path.basename(bbox).replace(\".jpg\", \"\")\n",
"\n",
" # Create the dictionary with common keys ################################################################################\n",
" BBox_Dict = {\n",
" \"Path\": bbox,\n",
" \"Target\": prefix + '_' + target,\n",
" 'Prov': prov_dict[prov],\n",
" }\n",
"\n",
" # Append the dictionary to the list #####################################################################################\n",
" BBox_Name_All.append(BBox_Dict)\n",
"\n",
"for elts in BBox_Name_All:\n",
" for item in Img_uri:\n",
" if item[\"Target Name\"]==elts[\"Target\"]:\n",
" elts.update({\"Prov_Used\": item[\"uri\"]})\n",
"\n",
"### Display the first dictionary in the list ################################################################################\n",
"display(BBox_Name_All[0])\n",
"print(len(BBox_Name_All))"
],
"metadata": {
"id": "bSPMue760hyx"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## Check if any Bounding Boxed Images already exist"
],
"metadata": {
"id": "iH__2U41j38e"
}
},
{
"cell_type": "code",
"source": [
"prov='Leaf_CNN-BBoxImage'\n",
"### Filter out existing BBoxed Images #######################################################################################\n",
"Dat_Api = osC.DataApi(Py_Client)\n",
"Dat_Src = Dat_Api.get_data_file_descriptions_by_search(provenances=[prov_dict[prov]], experiments=[NameExp_uri[NameExp]], page_size=100000)[\"result\"]\n",
"\n",
"BBox_Name_Com=[]\n",
"for i in BBox_Name_All:\n",
" for elts in Dat_Src:\n",
" if ScObj_uri[i[\"Target\"]] == elts.target:\n",
" BBox_Name_Com.append(i)\n",
" del elts\n",
"del i\n",
"print(\"{} images over {} already exit on the database\".format(len(BBox_Name_Com), len(BBox_Name_All)))\n",
"\n",
"### Exclude existing one from the Dict ######################################################################################\n",
"BBox_Name=[]\n",
"for i in BBox_Name_All:\n",
" if i not in BBox_Name_Com:\n",
" BBox_Name.append(i)\n",
"del i, BBox_Name_All, BBox_Name_Com, Dat_Src"
],
"metadata": {
"id": "tn8uUCAD8LAT"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## Import Bounding Boxed Images\n",
"
"
],
"metadata": {
"id": "bjHc0w31jqLZ"
}
},
{
"cell_type": "code",
"source": [
"### Import BBoxed Images ####################################################################################################\n",
"Py_Client.connect_to_opensilex_ws(identifier=login[\"Identifier\"],\n",
" password=login[\"Password\"],\n",
" host=login[\"Host\"])\n",
"\n",
"timelimit = datetime.datetime.now() + datetime.timedelta(minutes=30)\n",
"\n",
"Dat_Api = osC.DataApi(Py_Client)\n",
"for bbox in tqdm(BBox_Name):\n",
" description={\"rdf_type\": \"vocabulary:RGBImage\", #<- Here a DTO object was orginially proposed\n",
" \"date\": str(ExpInfo['Start Date']),\n",
" \"target\": ScObj_uri[bbox[\"Target\"]],\n",
" \"provenance\": {\"uri\": bbox[\"Prov\"],\n",
" \"prov_used\": [{\"uri\": bbox[\"Prov_Used\"],\n",
" \"rdf_type\":\"vocabulary:RGBImage\"}],\n",
" \"experiments\": [NameExp_uri[NameExp]]\n",
" }\n",
" }\n",
" Dat_Api.post_data_file(description=json.dumps(description), file=bbox[\"Path\"], ) #<- DTO object was replaced by json.dumps()\n",
" #display(description)\n",
" #print('#'*100)\n",
" del bbox, description\n",
"\n",
"if datetime.datetime.now() > timelimit:\n",
" Py_Client.connect_to_opensilex_ws(identifier=login[\"Identifier\"],\n",
" password=login[\"Password\"],\n",
" host=login[\"Host\"])\n",
" Dat_Api = osC.DataApi(Py_Client)\n",
" timelimit = datetime.datetime.now()+datetime.timedelta(minutes=30)\n",
" #print(Py_Client.default_headers['Authorization'])\n",
"\n",
"print ('Done')"
],
"metadata": {
"id": "J876zzHq89qj"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## Get Bounding Box datafile URI"
],
"metadata": {
"id": "oyIxHqgTkFqY"
}
},
{
"cell_type": "code",
"source": [
"prov='Leaf_CNN-BBoxImage'\n",
"\n",
"### Recover BBoxed URI ######################################################################################################\n",
"Dat_Api = osC.DataApi(Py_Client)\n",
"Dat_Src = Dat_Api.get_data_file_descriptions_by_search(provenances=[prov_dict[prov]], experiments=[NameExp_uri[NameExp]], page_size=100000)[\"result\"]\n",
"\n",
"BBox_uri=[]\n",
"for elts in Dat_Src:\n",
" for k, v in ScObj_uri.items():\n",
" if v == elts.target:\n",
" target=k\n",
"\n",
" BBox_uri.append({'Type': 'BBoxed',\n",
" \"Target URI\": [value for key, value in ScObj_uri.items() if key == target][0],\n",
" \"Target Name\": target,\n",
" \"uri\": elts.uri})\n",
"del elts, k, v, Dat_Src, Dat_Api\n",
"\n",
"display(BBox_uri[0])"
],
"metadata": {
"id": "FO1o6KcP9S-p"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"# Import Numerical Data\n",
"\n",
"
"
],
"metadata": {
"id": "19mwYrnyA0Tn"
}
},
{
"cell_type": "markdown",
"source": [
"## List Variable <-> Column Name Association \n",
"This section establishes the mapping between observed variables and their corresponding column names in the dataset."
],
"metadata": {
"id": "vn_XT92fkOAi"
}
},
{
"cell_type": "code",
"source": [
"### Get Exp Info ############################################################################################################\n",
"with open (os.path.join(wd, 'yaml', 'Variables.yaml'), 'r') as stream:\n",
" Variables=yaml.safe_load(stream)\n",
"\n",
"display(Variables)"
],
"metadata": {
"id": "e88N8NXIA0GX"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## PHIS data import\n",
"This section handles the import of numerical data from the dataset."
],
"metadata": {
"id": "ZazHByRSkplw"
}
},
{
"cell_type": "code",
"source": [
"prov = 'Leaf_CNN-PredictedData'\n",
"\n",
"### Import Numerical Data ###################################################################################################\n",
"Py_Client.connect_to_opensilex_ws(identifier=login[\"Identifier\"],\n",
" password=login[\"Password\"],\n",
" host=login[\"Host\"])\n",
"timelimit = datetime.datetime.now()+datetime.timedelta(minutes=30)\n",
"\n",
"Dat_Api = osC.DataApi(Py_Client)\n",
"Var_Api = osC.VariablesApi(Py_Client)\n",
"\n",
"Prov_Was_Associated_With=osC.ProvEntityModel(uri=\"phis-egi-demo:id/device/leaf_cnn-v1\",\n",
" rdf_type=\"vocabulary:Software\")\n",
"\n",
"logfile={}\n",
"for key, value in tqdm(Variables.items()):\n",
" logfile[value] = []\n",
" Var_Src = Var_Api.search_variables(name=value)[\"result\"]\n",
" pas=1000\n",
" count=0\n",
" for slc in range(0, len(df), pas):\n",
" df_Slice = df.iloc[slc : slc + pas]\n",
" bodies=[]\n",
" count=count+1\n",
" for index, row in df_Slice.iterrows():\n",
" Dat_Src=Dat_Api.search_data_list(targets = [ScObj_uri[row[\"ScObj Name\"]]],\n",
" variables = [Var_Src[0].uri],\n",
" experiments=[NameExp_uri[NameExp]], page_size=20)['result']\n",
" if Dat_Src:\n",
" logfile[value].append({'ScObj Name': {row[\"ScObj Name\"]}})\n",
"\n",
" else:\n",
" Prov_Used=None\n",
" for item in BBox_uri:\n",
" if item[\"Target Name\"]==row[\"ScObj Name\"]:\n",
" Prov_Used=osC.ProvEntityModel(uri=item[\"uri\"], rdf_type=\"vocabulary:RGBImage\")\n",
" body = osC.DataCreationDTO(_date = str(ExpInfo['Start Date']),\n",
" target = ScObj_uri[row[\"ScObj Name\"]],\n",
" variable = Var_Src[0].uri,\n",
" value = row[key],\n",
" provenance = osC.DataProvenanceModel(\n",
" uri = prov_dict[prov],\n",
" prov_used = [Prov_Used],\n",
" prov_was_associated_with = [Prov_Was_Associated_With],\n",
" experiments = [NameExp_uri[NameExp]]))\n",
" bodies.append(body)\n",
" if datetime.datetime.now() > timelimit:\n",
" Py_Client.connect_to_opensilex_ws(identifier=login[\"Identifier\"],\n",
" password=login[\"Password\"],\n",
" host=login[\"Host\"])\n",
" Dat_Api = osC.DataApi(Py_Client)\n",
" timelimit = datetime.datetime.now()+datetime.timedelta(minutes=30)\n",
" if bodies:\n",
" #print(bodies)\n",
" Dat_Api.add_list_data(body=bodies,)\n",
" else:\n",
" print(f'all data of {value} already uploaded')\n",
"print('Import Over')"
],
"metadata": {
"id": "tg0-GiBR6Xmw"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"
"
],
"metadata": {
"id": "bMFzHtiFzqTV"
}
}
]
}