diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml new file mode 100644 index 0000000..023bb1e --- /dev/null +++ b/.github/workflows/docker-publish.yml @@ -0,0 +1,67 @@ +name: Build and Push Docker Image + +on: + push: + branches: [ "main" ] + tags: [ 'v*.*.*' ] + pull_request: + branches: [ "main" ] + +env: + REGISTRY: ghcr.io + # Dynamically set the image name to lowercase 'ghcr.io/MPoL-dev/examples' + IMAGE_NAME: ${{ github.repository }} + +jobs: + build-and-push: + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + # Sets up QEMU for multi-platform support (e.g., amd64, arm64) + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + # Sets up Docker Buildx (required for multi-arch builds and advanced caching) + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + # Log in to GitHub Container Registry + - name: Log in to the Container Registry + if: github.event_name != 'pull_request' + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + # Extract metadata (tags, labels) for Docker + - name: Extract Docker metadata + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + tags: | + type=ref,event=branch + type=semver,pattern={{version}} + type=sha,format=short + + # Build and push Docker image + - name: Build and push Docker image + uses: docker/build-push-action@v6 + with: + context: ./casatools-env + file: ./casatools-env/Dockerfile + push: ${{ github.event_name != 'pull_request' }} + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + # Specifying platforms ensures optimization across target architectures + platforms: linux/amd64,linux/arm64 + # Utilizing GitHub Actions native caching dramatically speeds up subsequent builds + cache-from: type=gha + cache-to: type=gha,mode=max \ No newline at end of file diff --git a/00-download-and-extract-datasets/README.md b/00-download-and-extract-datasets/README.md new file mode 100644 index 0000000..c9a3930 --- /dev/null +++ b/00-download-and-extract-datasets/README.md @@ -0,0 +1,16 @@ +# Download and extract datasets + +# Installation and running + +In this 'example,' one merely downloads and extracts the ALMA datasets to a common data format like `.npz` or `.asdf`. This step requires the `casatools` package, which frequently has restrictions on Python versions and installation environments. + +To simplify this step for users of the tutorials, one can use our Docker container via +``` +./run.sh +``` + +Depending on the speed of your internet connection, it may take some time to download the several Gb measurement sets from the archive servers. + +Upon successful completion, you should see the following items in your directory: + +Alternatively, if one already has their own Python environment compatible casatools, one can install the [relevant packages](../casatools-env/requirements.txt) into that environment. \ No newline at end of file diff --git a/00-download-and-extract-datasets/Snakefile b/00-download-and-extract-datasets/Snakefile new file mode 100644 index 0000000..a65f219 --- /dev/null +++ b/00-download-and-extract-datasets/Snakefile @@ -0,0 +1,24 @@ +rule all: + input: + "data/IM_Lup_baselines_and_weights.npz", + "data/IM_Lup_baseline_plot.png" + +rule download_ms: + output: temp("data/IMLup_continuum.ms.tgz") + shell: "wget https://almascience.eso.org/almadata/lp/DSHARP/MSfiles/IMLup_continuum.ms.tgz --directory-prefix=data/" + +rule untar_and_rename: + input: "data/IMLup_continuum.ms.tgz" + output: directory("data/IM_Lup.ms") + shell: + "tar -xf {input} -C data/ --no-same-owner && " + "mv data/IMLup_continuum.ms {output}" + +# only uu and vv that have *all channels unflagged* +# only 1 channel is taken from those available: [1, 8, 16] +# 0.05 taken randomly from those +# saved using float32 +rule export_baselines: + input: "data/IM_Lup.ms" + output: npz="data/IM_Lup_baselines_and_weights.npz", plot="data/IM_Lup_baseline_plot.png" + shell: "python export_baselines.py {input} {output.npz} {output.plot} --select_fraction 0.05" diff --git a/01-generate-mock-baselines/export_baselines.py b/00-download-and-extract-datasets/export_baselines.py similarity index 100% rename from 01-generate-mock-baselines/export_baselines.py rename to 00-download-and-extract-datasets/export_baselines.py diff --git a/00-download-and-extract-datasets/run.sh b/00-download-and-extract-datasets/run.sh new file mode 100755 index 0000000..80bdc4a --- /dev/null +++ b/00-download-and-extract-datasets/run.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env bash + +docker run --rm --platform linux/amd64 \ + -v "$(pwd)":/workspace \ + casatools-env \ + snakemake -c1 all \ No newline at end of file diff --git a/README.md b/README.md index c491754..a384eca 100644 --- a/README.md +++ b/README.md @@ -1,16 +1,18 @@ # MPoL Examples -This repository hosts self-contained examples demonstrating [MPoL](https://mpol-dev.github.io/MPoL/) functionality. More info on each example can be found in the README.md within each example folder. +This repository hosts self-contained examples demonstrating [MPoL](https://mpol-dev.github.io/MPoL/) functionality. More info on each example can be found in the README.md within each example folder. This repository is *not* continuously integrated with the rest of the codebase because the computational demands are too significant. If you do encounter an error, please log it as a [GitHub issue](https://github.com/MPoL-dev/examples/issues). -This repository is *not* continuously integrated with the rest of the codebase, because the computational demands are too significant. If you do encounter an error, please log it as a [GitHub issue](https://github.com/MPoL-dev/examples/issues). +# Installing and running the examples -# Installing and Running the Examples +These examples strive to use real, or approximately real, data to demonstrate MPoL functionality. Unfortunately, using real ALMA data means that we must contend with the fact that the Python `casatools` package frequently lags supported [SPEC 0](https://scientific-python.org/specs/spec-0000/) Python versions. This creates a situation where one needs both a deprecated Python version old enough to run `casatools` and a current Python version new enough to support modern development. -Each example assumes you have activated a (virtual) python environment to which you've [successfully installed the MPoL package](https://mpol-dev.github.io/MPoL/installation.html), and that your version of python is at least the minimum current version supported by MPoL. +## The initial 00 example +We've isolated the `casatools` dependency to the initial dataset download and extraction in the `00-download-and-extract-datasets` example. In that folder, you will need to install `casatools` into an older Python version that still supports it (e.g., 3.10). ([More Info](./00-download-and-extract-datasets/README.md)) -To run an example, `cd` into that subfolder. +## All other examples +Once you have completed the 00 example and extracted the ALMA datasets to either `.npz` or `.asdf` formats, you can then copy these data products into the `01-...` or `02-...` example folders that require them. In those later example folders, it is assumed that you have activated a (virtual) python environment to which you've [successfully installed the MPoL package](https://mpol-dev.github.io/MPoL/installation.html), and that your version of python is at least the minimum current version supported by MPoL. -The `requirements.txt` file lists the additional python packages necessary for the analysis specific to the example in that folder. You can install them with +Each folder will have a `requirements.txt` file that lists the additional python packages necessary for the analysis specific to the example in that folder. You can install them with ``` pip install -r requirements.txt ``` @@ -22,10 +24,9 @@ $ snakemake -c 1 all ``` and you should see all scripts execute in order. - -# Table of Contents -* [01 - **Intro**: Setup Mock Image and Baselines](01-generate-mock-baselines/README.md) | Generate a mock sky image $I_\nu(l,m)$ and interferometer baselines $(u,v)$ (but not visibilities $\mathcal{V}(u,v)$). These products are used as input for the other examples. +# List of Examples +* [00 - **Setup**: Download and Extract Datasets](00-download-and-extract-datasets/README.md) | Download a few calibrated ALMA measurement sets and use `casatools` and `visread` to extract the visibilities to a common format like `.npz` or `.asdf`. +* [01 - **Intro**: Setup Mock Image and Baselines](01-generate-mock-baselines/README.md) | Generate a mock sky image $I_\nu(l,m)$ and interferometer baselines $(u,v)$ (but not the visibilities). These products are used as input for the other examples. * [02 - **Intro**: Stochastic Gradient Descent](02-sgd/README.md) | A complete end-to-end example using MPoL to image mock data. * [03 - **Advanced**: Visibility Inference with Pyro](03-AS209-pyro-inference/README.md) | Use MPoL with Pyro to sample parametric visibility plane models. -* [04 - **Advanced**: IM Lup protoplanetary disk](04-IMLup-multi-EB) | Use MPoL to image the ALMA DSHARP observations of the IM Lup protoplanetary disk, taking into account alignment and weight-scaling adjustments for a multi-execution block dataset. - +* [04 - **Advanced**: IM Lup protoplanetary disk](04-IMLup-multi-EB) | Use MPoL to image the ALMA DSHARP observations of the IM Lup protoplanetary disk, taking into account alignment and weight-scaling adjustments for a multi-execution block dataset. \ No newline at end of file diff --git a/casatools-env/Dockerfile b/casatools-env/Dockerfile new file mode 100644 index 0000000..7ebdd90 --- /dev/null +++ b/casatools-env/Dockerfile @@ -0,0 +1,44 @@ +# casatools-env/Dockerfile +# need to force the platform here so that casatools is available for the right +# architecture. Doesn't work with Apple Silicon. +FROM --platform=linux/amd64 ubuntu:24.04 + +ENV DEBIAN_FRONTEND=noninteractive + +# Install core system dependencies +RUN apt-get update && apt-get install -y \ + python3.12 \ + python3.12-dev \ + python3.12-venv \ + python3-pip-whl \ + gfortran \ + libgomp1 \ + wget \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /workspace + +RUN python3.12 -m venv /opt/casa_venv + +# Add the virtual environment binaries to the container path +# This automatically "activates" the venv +ENV PATH="/opt/casa_venv/bin:$PATH" + +# Upgrade pip inside the virtual environment +RUN pip install --no-cache-dir --upgrade pip setuptools wheel + +# Copy and install requirements +COPY requirements.txt /workspace/requirements.txt +RUN pip install --no-cache-dir -r requirements.txt + +# create casa data directory +# https://casadocs.readthedocs.io/en/stable/notebooks/external-data.html +RUN mkdir -p /root/.casa/data + +COPY casasiteconfig.py /workspace/casasiteconfig.py +ENV CASASITECONFIG="/workspace/casasiteconfig.py" + +# Trigger the initial measures data download inside the container build, +# so that this isn't redone each time the user runs the container +# https://casadocs.readthedocs.io/en/stable/api/casaconfig.html#casasiteconfig-py +RUN python3 -c "from casaconfig import pull_data; pull_data('/root/.casa/data')" \ No newline at end of file diff --git a/casatools-env/README.md b/casatools-env/README.md new file mode 100644 index 0000000..e80e9ab --- /dev/null +++ b/casatools-env/README.md @@ -0,0 +1,16 @@ +# Docker image with casatools + +This folder contains the instructions and Dockerfile for how to build a Docker container that will run `casatools`, necessary for the `00-download-and-extract-datasets` example. In the normal course of events, this container is built automatically as part of the GitHub Actions workflow, and you can download it from the GitHub Container Registry following the instructions in the [00 example](../00-download-and-extract-datasets/README.md). + +The following instructions are aimed at developers of the examples. It is assumed that you have already installed a [Docker environment](https://www.docker.com/), e.g., such as Docker Desktop. + +Building the container locally, from within this directory, and tag as `casatools-env` +``` +docker build --platform linux/amd64 -t casatools-env . +``` +Note that the `--platform linux/amd64` is necessary to accommodate the modular casa packages, which to my understanding are not built for Apple Silicon architecture. + +Run the container and enter a bash shell +``` +docker run --rm -it --platform linux/amd64 casatools-env +``` \ No newline at end of file diff --git a/casatools-env/build_locally.sh b/casatools-env/build_locally.sh new file mode 100755 index 0000000..4c7fae3 --- /dev/null +++ b/casatools-env/build_locally.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +# note: it is not required to run this script to build the container, +# it should already be built by GitHub workflows and available via the +# GitHub Container Registry +# However, if you want to build the container locally, then use this +docker build --platform linux/amd64 -t casatools-env . \ No newline at end of file diff --git a/casatools-env/casasiteconfig.py b/casatools-env/casasiteconfig.py new file mode 100644 index 0000000..b6bb945 --- /dev/null +++ b/casatools-env/casasiteconfig.py @@ -0,0 +1,18 @@ +# https://casadocs.readthedocs.io/en/stable/api/casaconfig.html#casasiteconfig-py +# An example site config file. +# Place this in a location checked by casaconfig: +# /opt/casa/casasiteconfig.py +# /home/casa/casasiteconfig.py +# the environment value CASASITECONFIG - use the fully qualified path +# anywhere in the python path, e.g. the site-packages directory in the CASA being used + +# This file should be edited to set measurespath as appropriate + +# Set this to point to the location where the site maintained casarundata can be found +# by default datapath will include measurespath + +measurespath = "/root/.casa/data" + +# turn off all auto updates of data +measures_auto_update = False +data_auto_update = False \ No newline at end of file diff --git a/casatools-env/requirements.txt b/casatools-env/requirements.txt new file mode 100644 index 0000000..b120689 --- /dev/null +++ b/casatools-env/requirements.txt @@ -0,0 +1,5 @@ +casatools +casadata +numpy +snakemake +visread[casa] \ No newline at end of file