diff --git a/abfe_tutorial/abfe_analysis.ipynb b/abfe_tutorial/abfe_analysis.ipynb index 9d5738a..9ba2ee1 100644 --- a/abfe_tutorial/abfe_analysis.ipynb +++ b/abfe_tutorial/abfe_analysis.ipynb @@ -33,32 +33,24 @@ "name": "stdout", "output_type": "stream", "text": [ - "--2025-10-21 11:53:17-- https://zenodo.org/records/17348229/files/abfe_results.zip\n", - "Resolving zenodo.org (zenodo.org)... 188.185.45.92, 188.185.43.25, 188.185.48.194, ...\n", - "Connecting to zenodo.org (zenodo.org)|188.185.45.92|:443... connected.\n", - "HTTP request sent, awaiting response... 200 OK\n", - "Length: 1005319 (982K) [application/octet-stream]\n", - "Saving to: ‘abfe_results.zip’\n", - "\n", - "abfe_results.zip 100%[===================>] 981.76K 718KB/s in 1.4s \n", - "\n", - "2025-10-21 11:53:20 (718 KB/s) - ‘abfe_results.zip’ saved [1005319/1005319]\n", - "\n", + " % Total % Received % Xferd Average Speed Time Time Time Current\n", + " Dload Upload Total Spent Left Speed\n", + "100 3092k 100 3092k 0 0 897k 0 0:00:03 0:00:03 --:--:-- 897k\n", "Archive: abfe_results.zip\n", - " creating: abfe_results/\n", - " creating: abfe_results/results_2/\n", - " inflating: abfe_results/toluene_results.json \n", - " creating: abfe_results/results_1/\n", - " creating: abfe_results/results_0/\n", - " inflating: abfe_results/results_2/1.json \n", - " inflating: abfe_results/results_1/1.json \n", - " inflating: abfe_results/results_0/1.json \n" + " inflating: abfe_results/README.md \n", + " inflating: abfe_results/abfe_results_single_unit/results_2/1.json \n", + " inflating: abfe_results/abfe_results_single_unit/toluene_results.json \n", + " inflating: abfe_results/abfe_results_single_unit/results_1/1.json \n", + " inflating: abfe_results/abfe_results_single_unit/results_0/1.json \n", + " inflating: abfe_results/abfe_results_multiple_units/results_2/ligand1_transformation.json \n", + " inflating: abfe_results/abfe_results_multiple_units/results_1/ligand1_transformation.json \n", + " inflating: abfe_results/abfe_results_multiple_units/results_0/ligand1_transformation.json \n" ] } ], "source": [ - "!wget https://zenodo.org/records/17348229/files/abfe_results.zip\n", - "!unzip abfe_results.zip" + "!curl -O https://zenodo.org/records/19498687/files/abfe_results.zip\n", + "!unzip -o abfe_results.zip" ] }, { @@ -108,6 +100,38 @@ { "cell_type": "code", "execution_count": 3, + "id": "08afcbcf-34a8-4450-a967-eb4ed5800ee1", + "metadata": {}, + "outputs": [], + "source": [ + "def _get_name(result: dict) -> str:\n", + " \"\"\"Get the ligand name from a unit's results data.\n", + "\n", + " Parameters\n", + " ----------\n", + " result : dict\n", + " A results dict.\n", + "\n", + " Returns\n", + " -------\n", + " str\n", + " Ligand name corresponding to the results.\n", + " \"\"\"\n", + "\n", + " solvent_data = list(result[\"protocol_result\"][\"data\"][\"solvent\"].values())[0][0]\n", + " try:\n", + " name = solvent_data[\"inputs\"][\"setup_results\"][\"inputs\"][\"alchemical_components\"][\"stateA\"][\n", + " 0\n", + " ][\"molprops\"][\"ofe-name\"]\n", + " except KeyError:\n", + " name = solvent_data[\"inputs\"][\"alchemical_components\"][\"stateA\"][0][\"molprops\"][\"ofe-name\"]\n", + "\n", + " return str(name)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, "id": "04ccdedd-84e3-4ccc-ae42-4f3772acb115", "metadata": {}, "outputs": [], @@ -153,14 +177,16 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "id": "3733c540-de62-45a0-ba66-268397a38b49", "metadata": {}, "outputs": [], "source": [ + "\n", + "\n", "def _get_legs_from_result_jsons(\n", - " result_fns: list[pathlib.Path]\n", - ") -> dict[tuple[str, str], dict[str, list]]:\n", + " result_fns: list[pathlib.Path],\n", + ") -> dict[str, dict[str, list]]:\n", " \"\"\"\n", " Iterate over a list of result JSONs and populate a dict of dicts with all data needed\n", " for results processing.\n", @@ -175,8 +201,8 @@ "\n", " Returns\n", " -------\n", - " legs: dict[str,,dict[str, list]]\n", - " Data extracted from the given result JSONs, organized by the ligand name and simulation type.\n", + " legs: dict[str, dict[str, list]]\n", + " Data extracted from the given result JSONs, organized by the leg's ligand name and simulation type.\n", " \"\"\"\n", " from collections import defaultdict\n", "\n", @@ -187,66 +213,42 @@ " if name is None: # this means it couldn't find name and/or simtype\n", " continue\n", "\n", - " dgs[name]['overall'].append([result[\"estimate\"], result[\"uncertainty\"]])\n", - " proto_key = [\n", - " k\n", - " for k in result[\"unit_results\"].keys()\n", - " if k.startswith(\"ProtocolUnitResult\") \n", - " ]\n", + " dgs[name][\"overall\"].append([result[\"estimate\"], result[\"uncertainty\"]])\n", + " proto_key = [k for k in result[\"unit_results\"].keys() if k.startswith(\"ProtocolUnitResult\")]\n", " for p in proto_key:\n", + " # In openfe v1.9+, we only want to pick up results from\n", + " # the Analysis Unit. To ensure backwards compatibility with\n", + " # prior releases of openfe v1.x, we exclude Setup and Simulation\n", + " if (\n", + " \"Setup\" in result[\"unit_results\"][p][\"source_key\"]\n", + " or \"Simulation\" in result[\"unit_results\"][p][\"source_key\"]\n", + " ):\n", + " continue\n", " if \"unit_estimate\" in result[\"unit_results\"][p][\"outputs\"]:\n", " simtype = result[\"unit_results\"][p][\"outputs\"][\"simtype\"]\n", " dg = result[\"unit_results\"][p][\"outputs\"][\"unit_estimate\"]\n", " dg_error = result[\"unit_results\"][p][\"outputs\"][\"unit_estimate_error\"]\n", - " \n", + "\n", " dgs[name][simtype].append([dg, dg_error])\n", " if \"standard_state_correction\" in result[\"unit_results\"][p][\"outputs\"]:\n", " corr = result[\"unit_results\"][p][\"outputs\"][\"standard_state_correction\"]\n", - " dgs[name][\"standard_state_correction\"].append([corr, 0*unit.kilocalorie_per_mole])\n", + " # In openfe v1.9+, standard state corrections are set to 0 kcal/mol\n", + " # when no correction is being applied (e.g. no restraints).\n", + " # To make raw outputs similar to pre-v1.9, we exclude corrections\n", + " # if they are close to 0.\n", + " if not np.isclose(corr.m, 0):\n", + " dgs[name][\"standard_state_correction\"].append(\n", + " [corr, 0 * unit.kilocalorie_per_mole]\n", + " )\n", " else:\n", " continue\n", - "\n", - " return dgs" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "08afcbcf-34a8-4450-a967-eb4ed5800ee1", - "metadata": {}, - "outputs": [], - "source": [ - "def _get_name(result:dict) -> str:\n", - " \"\"\"Get the ligand name from a unit's results data.\n", - "\n", - " Parameters\n", - " ----------\n", - " result : dict\n", - " A results dict.\n", - "\n", - " Returns\n", - " -------\n", - " str\n", - " Ligand name corresponding to the results.\n", - " \"\"\"\n", - " try:\n", - " nm = list(result['unit_results'].values())[0]['name']\n", - "\n", - " except KeyError:\n", - " raise ValueError(\"Failed to guess name\")\n", - "\n", - " toks = nm.split('Binding, ')\n", - " if 'solvent' in toks[1]:\n", - " name = toks[1].split(' solvent')[0]\n", - " if 'complex' in toks[1]:\n", - " name = toks[1].split(' complex')[0]\n", - " return name" + " return dgs\n" ] }, { "cell_type": "code", "execution_count": 6, - "id": "a5d4aef3-1fab-40b4-848c-d59df8ee1441", + "id": "0621e3a2-7906-4661-a640-c414456f8869", "metadata": {}, "outputs": [], "source": [ @@ -254,16 +256,8 @@ " \"\"\"\n", " Calculate the error of the estimate as the std of the repeats\n", " \"\"\"\n", - " return np.std([v[0].m for v in r[\"overall\"]])" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "0621e3a2-7906-4661-a640-c414456f8869", - "metadata": {}, - "outputs": [], - "source": [ + " return np.std([v[0].m for v in r[\"overall\"]])\n", + " \n", "def _error_mbar(r):\n", " \"\"\"\n", " Calculate the error of the estimate using the reported MBAR errors.\n", @@ -291,7 +285,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 7, "id": "5821b7f7-6aed-4138-9502-44df3af52647", "metadata": {}, "outputs": [], @@ -304,7 +298,7 @@ "\n", " Parameters\n", " ----------\n", - " results_files : list[ps.PathLike | str]\n", + " results_files : list[os.PathLike]\n", " A list of directors with ABFE result files to process.\n", "\n", " Returns\n", @@ -314,6 +308,7 @@ " \"\"\"\n", " # find and filter result jsons\n", " result_fns = _collect_result_jsons(results_files)\n", + "\n", " # pair legs of simulations together into dict of dicts\n", " sim_results = _get_legs_from_result_jsons(result_fns)\n", "\n", @@ -322,7 +317,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 8, "id": "e021b2ea-db13-4e47-a6d1-cf5229b5b494", "metadata": {}, "outputs": [], @@ -363,7 +358,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 9, "id": "b077c4e2-373a-4314-a527-186bb4683262", "metadata": {}, "outputs": [], @@ -418,35 +413,13 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 10, "id": "7bc49c0e-6fec-409c-a01c-42c35f57dcc6", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/ialibay/software/mambaforge/install/envs/openfe/lib/python3.12/site-packages/openmoltools/utils.py:9: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.\n", - " from pkg_resources import resource_filename\n", - "/home/ialibay/software/mambaforge/install/envs/openfe/lib/python3.12/site-packages/Bio/Application/__init__.py:39: BiopythonDeprecationWarning: The Bio.Application modules and modules relying on it have been deprecated.\n", - "\n", - "Due to the on going maintenance burden of keeping command line application\n", - "wrappers up to date, we have decided to deprecate and eventually remove these\n", - "modules.\n", - "\n", - "We instead now recommend building your command line and invoking it directly\n", - "with the subprocess module.\n", - " warnings.warn(\n" - ] - } - ], + "outputs": [], "source": [ "# Specify paths to result directories\n", - "results_dir = [\n", - " pathlib.Path(\"abfe_results/results_0\"),\n", - " pathlib.Path(\"abfe_results/results_1\"),\n", - " pathlib.Path(\"abfe_results/results_2\"),\n", - "]\n", + "results_dir = [pathlib.Path(\"abfe_results/abfe_results_multiple_units\")]\n", "dgs = extract_results_dict(results_dir)" ] }, @@ -463,7 +436,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 11, "id": "46996a74-709c-41f2-ac39-0f77fb33371e", "metadata": {}, "outputs": [], @@ -474,7 +447,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 12, "id": "d1a6ad61-1e5a-4d8a-9067-9ed428ef145c", "metadata": {}, "outputs": [ @@ -508,8 +481,8 @@ " \n", " 0\n", " 1\n", - " -18.36\n", - " 0.98\n", + " -20.87\n", + " 0.47\n", " \n", " \n", "\n", @@ -517,10 +490,10 @@ ], "text/plain": [ " ligand DG (kcal/mol) uncertainty (kcal/mol)\n", - "0 1 -18.36 0.98" + "0 1 -20.87 0.47" ] }, - "execution_count": 13, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -540,7 +513,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 13, "id": "8b2c1dd8-ffa3-4585-94a7-4a1ed1454f30", "metadata": {}, "outputs": [], @@ -551,7 +524,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 14, "id": "08b72901-9c71-460b-b5da-9fb4a35e07f7", "metadata": {}, "outputs": [ @@ -587,43 +560,43 @@ " 0\n", " complex\n", " 1\n", - " 36.87\n", - " 0.36\n", + " 36.34\n", + " 0.91\n", " \n", " \n", " 1\n", " complex\n", " 1\n", - " 39.24\n", - " 0.44\n", + " 36.17\n", + " 0.80\n", " \n", " \n", " 2\n", " complex\n", " 1\n", - " 37.46\n", - " 0.48\n", + " 34.77\n", + " 0.81\n", " \n", " \n", " 3\n", " solvent\n", " 1\n", - " 10.57\n", - " 0.66\n", + " 6.5\n", + " 1.4\n", " \n", " \n", " 4\n", " solvent\n", " 1\n", - " 10.48\n", - " 0.65\n", + " 5.4\n", + " 1.4\n", " \n", " \n", " 5\n", " solvent\n", " 1\n", - " 10.41\n", - " 0.66\n", + " 5.5\n", + " 1.4\n", " \n", " \n", " 6\n", @@ -636,7 +609,7 @@ " 7\n", " standard_state_correction\n", " 1\n", - " -9.1\n", + " -9.3\n", " 0.0\n", " \n", " \n", @@ -652,18 +625,18 @@ ], "text/plain": [ " leg ligand DG (kcal/mol) uncertainty (kcal/mol)\n", - "0 complex 1 36.87 0.36\n", - "1 complex 1 39.24 0.44\n", - "2 complex 1 37.46 0.48\n", - "3 solvent 1 10.57 0.66\n", - "4 solvent 1 10.48 0.65\n", - "5 solvent 1 10.41 0.66\n", + "0 complex 1 36.34 0.91\n", + "1 complex 1 36.17 0.80\n", + "2 complex 1 34.77 0.81\n", + "3 solvent 1 6.5 1.4\n", + "4 solvent 1 5.4 1.4\n", + "5 solvent 1 5.5 1.4\n", "6 standard_state_correction 1 -8.9 0.0\n", - "7 standard_state_correction 1 -9.1 0.0\n", + "7 standard_state_correction 1 -9.3 0.0\n", "8 standard_state_correction 1 -9.0 0.0" ] }, - "execution_count": 15, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -671,6 +644,14 @@ "source": [ "df_raw" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bf160a28-3aa1-4661-8f06-3a4a71d34ff2", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -689,7 +670,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.11" + "version": "3.12.13" }, "widgets": { "application/vnd.jupyter.widget-state+json": {