Source code for PTLF.lab

"""
create or use uor lab
"""
from pathlib import Path
import os
import json
from datetime import datetime
from typing import Optional
import pandas as pd

from .context import set_shared_data, get_caller, register_libs_path, get_shared_data
from .utils import Db

__all__ = ["lab_setup", "create_project", "get_logs"]
 
def export_settigns():
    settings = get_shared_data()
    # Change project_path to data_path parent
    pth = os.path.join(Path(settings['data_path']).parent, settings["project_name"] + ".json")
    with open(pth, "w", encoding="utf-8") as out_file:
        json.dump(settings, out_file, indent=4)
    return pth

[docs] def create_project(settings: dict) -> str: """ Create the project directory structure, databases, and settings file. Returns the absolute path to the settings JSON. """ project_dir = os.path.abspath(settings["project_dir"]) project_name = settings["project_name"] component_dir = os.path.abspath(settings["component_dir"]) # Derived paths data_path = os.path.join(project_dir, project_name) setting_path = os.path.join(data_path, f"{project_name}.json") # Update settings with absolute paths settings.update({ "project_dir": project_dir, "component_dir": component_dir, "data_path": data_path, "setting_path": setting_path, }) # Create required directories for key in ["data_path", "component_dir"]: os.makedirs(settings[key], exist_ok=True) artf_dirs = ["Configs", "Quicks", "Histories", "Weights"] for i in artf_dirs: os.makedirs(os.path.join(data_path, i), exist_ok=True) for parent in ["Archived", "Transfer"]: for i in artf_dirs: os.makedirs(os.path.join(data_path, parent, i), exist_ok=True) # Remove old databases if any for db_file in ["logs.db", "ppls.db"]: db_path = os.path.join(data_path, db_file) if os.path.exists(db_path): os.remove(db_path) # Setup DBs and shared data setup_databases(settings) set_shared_data(settings) # Save settings file with open(setting_path, "w", encoding="utf-8") as f: json.dump(settings, f, indent=4) return setting_path
def setup_databases(settings: dict): """ Sets up the required databases for the lab project, including: - logs.db (with logs table) - ppls.db (with ppls, edges, runnings tables) - Archived/ppls.db (with ppls table) """ from .utils import Db from .context import get_caller def create_and_init_db(db_path: str, tables: list, init_statements: list = None): db = Db(db_path=db_path) for table_sql in tables: db.execute(table_sql) if init_statements: for stmt, params in init_statements: db.execute(stmt, params) db.close() # ---- logs.db ---- logs_db_path = os.path.join(settings["data_path"], "logs.db") logs_table = """ CREATE TABLE IF NOT EXISTS logs ( logid TEXT PRIMARY KEY, called_at TEXT NOT NULL, created_time TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP ); """ log_init = [("INSERT INTO logs (logid, called_at) VALUES (?, ?)", ('log0', get_caller()))] create_and_init_db(logs_db_path, [logs_table], log_init) # ---- ppls.db ---- ppls_db_path = os.path.join(settings["data_path"], "ppls.db") ppls_tables = [ """ CREATE TABLE IF NOT EXISTS ppls ( pplid TEXT PRIMARY KEY, args_hash TEXT NOT NULL, status TEXT NOT NULL DEFAULT 'init' CHECK(status IN ('init', 'running', 'frozen', 'cleaned')), created_time TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP ); """, """ CREATE TABLE IF NOT EXISTS edges ( edgid INTEGER PRIMARY KEY AUTOINCREMENT, prev TEXT NOT NULL, next TEXT NOT NULL, desc TEXT, directed BOOL DEFAULT TRUE, FOREIGN KEY(prev) REFERENCES ppls(pplid), FOREIGN KEY(next) REFERENCES ppls(pplid) ); """, """ CREATE TABLE IF NOT EXISTS runnings ( runid INTEGER PRIMARY KEY AUTOINCREMENT, pplid NOT NULL, logid TEXT DEFAULT NULL, parity TEXT DEFAULT NULL, started_time TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP, FOREIGN KEY(pplid) REFERENCES ppls(pplid) ); """ ] create_and_init_db(ppls_db_path, ppls_tables) # ---- Archived/ppls.db ---- archived_ppls_db_path = os.path.join(settings["data_path"], "Archived", "ppls.db") archived_ppls_table = """ CREATE TABLE IF NOT EXISTS ppls ( pplid TEXT PRIMARY KEY, args_hash TEXT NOT NULL, status TEXT NOT NULL DEFAULT 'init' CHECK(status IN ('init', 'running', 'frozen', 'cleaned')), created_time TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP ); """ create_and_init_db(archived_ppls_db_path, [archived_ppls_table])
[docs] def lab_setup(settings_path: Optional[str]) -> None: """ Initialize the lab environment using settings from a JSON file. Loads configuration settings, registers the component directory, and logs the setup event in the lab's `logs.csv`. Parameters ---------- settings_path : str Path to the JSON settings file. Must exist and contain valid settings. relative_to : str if settings have relative paths then that will be consider from this path Raises ------ ValueError If `settings_path` is not provided or the file does not exist. """ if settings_path and os.path.exists(settings_path): with open(settings_path, encoding="utf-8") as sp: settings = json.load(sp) else: raise ValueError("Provide either settings_path or settings for lab setup") caller = get_caller() log_path = os.path.join(settings["data_path"], "logs.db") db = Db(db_path=log_path) # Get current number of logs cursor = db.execute("SELECT COUNT(*) FROM logs") row_count = cursor.fetchone()[0] logid = f"log{row_count}" # Insert new log db.execute( "INSERT INTO logs (logid, called_at) VALUES (?, ?)", (logid, caller) ) db.close() set_shared_data(settings, logid) register_libs_path(settings["component_dir"])
[docs] def get_logs(): """ Retrieve all log entries from the logs database. This function reads configuration settings to determine the path to the logs database, opens a connection to the database, queries all records from the 'logs' table, and returns the results as a pandas DataFrame. Returns: pd.DataFrame: A DataFrame containing all rows from the 'logs' table, with columns matching the database schema. Raises: KeyError: If 'data_path' is not present in the settings. Exception: If there is an error accessing or querying the database. """ settings = get_shared_data() log_path = os.path.join(settings["data_path"], "logs.db") db = Db(db_path=log_path) cursor = db.execute("SELECT * FROM logs") rows = cursor.fetchall() col_names = [desc[0] for desc in cursor.description] db.close() df = pd.DataFrame(rows, columns=col_names) return df