From fc1c6003b1f0fb69989c07c1f8b9509b09015472 Mon Sep 17 00:00:00 2001 From: Esteban Zimanyi Date: Mon, 18 May 2026 09:38:07 +0200 Subject: [PATCH] feat: generate an OpenAPI 3.1 contract from the enriched catalog Adds generator/openapi.py + generate_openapi.py: projects the enriched MEOS catalog (network/wire/typeEncodings from the service-projection pass) onto an OpenAPI 3.1 document. - one RPC-style POST /{function} per stateless-exposable function (a function is to MEOS what a process is to OGC API - Processes) - opaque values cross the wire as strings in their typeEncodings (text/MF-JSON/HexWKB), surfaced as reusable component schemas - enums become string component schemas with the real C constant names - x-meos-{category,decode,encode,in,out,encodings} make the document self-describing for a downstream server/MCP/gRPC generator - pure dict -> dict (no libclang, no MEOS runtime), deterministic output Logically depends on the catalog being enriched. Validated against the live MobilityDB master catalog: 2672 functions -> 1790 operations over 14 component schemas, all $refs resolve. Documented in docs/openapi.md; tested in tests/test_openapi.py (stdlib unittest). --- README.md | 28 ++++++ docs/openapi.md | 75 +++++++++++++++ generate_openapi.py | 40 ++++++++ generator/openapi.py | 209 ++++++++++++++++++++++++++++++++++++++++++ tests/test_openapi.py | 183 ++++++++++++++++++++++++++++++++++++ 5 files changed, 535 insertions(+) create mode 100644 docs/openapi.md create mode 100644 generate_openapi.py create mode 100644 generator/openapi.py create mode 100644 tests/test_openapi.py diff --git a/README.md b/README.md index fb0a8d0..6c69a1a 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,7 @@ This catalog is the foundation for generating language bindings (Python, Java, R - [Getting started](#getting-started) - [Output format](#output-format) - [Adding metadata](#adding-metadata) +- [OpenAPI generation](#openapi-generation) ## How it works @@ -83,3 +84,30 @@ A typical function entry looks like this: ## Adding metadata Manual annotations (ownership rules, additional documentation, deprecation flags, etc.) live in `meta/meos-meta.json`. The merger applies them on top of the libclang-parsed structure when generating the final catalog. + +## OpenAPI generation + +The enriched catalog (the `network` / `wire` / `typeEncodings` produced by the +service-projection pass) can be projected onto an **OpenAPI 3.1** contract — +this is the concrete "OpenAPI is a projection of MEOS-API" step: + +```bash +python run.py # produce the enriched catalog +python generate_openapi.py # output/meos-idl.json -> output/meos-openapi.json +``` + +Every *stateless-exposable* MEOS function becomes one RPC-style +`POST /{function}` operation (≈ an OGC API – Processes "process"); opaque +values cross the wire as strings carried in their `typeEncodings` +(text / MF-JSON / HexWKB), surfaced as reusable component schemas. `x-meos-*` +extensions carry the decode/encode function names and category so a +downstream server or MCP generator can consume the same document. + +Against the live MobilityDB `master` catalog this yields **1952 operations** +(90% of the public API; internal `meos_internal*.h` policy-excluded), +including array-of-string params for builders. The generator is pure +`dict` → `dict` (no libclang, +no MEOS runtime); see [`docs/openapi.md`](docs/openapi.md) for the projection +rules, `x-meos-*` extensions, and roadmap (OGC API, MCP, runtime server), and +[`tests/test_openapi.py`](tests/test_openapi.py) for worked examples +(`python3 tests/test_openapi.py`). diff --git a/docs/openapi.md b/docs/openapi.md new file mode 100644 index 0000000..68c5df0 --- /dev/null +++ b/docs/openapi.md @@ -0,0 +1,75 @@ +# OpenAPI projection + +`generator/openapi.py` turns the **enriched** catalog (`meos-idl.json` with +`category` / `network` / `wire` / `typeEncodings` — see +[`enrichment.md`](enrichment.md)) into an **OpenAPI 3.1** document. It is the +concrete realisation of "OpenAPI is a projection of MEOS-API": the canonical +semantic catalog is the single source, OpenAPI is one rendering of it. + +```bash +python run.py # enriched catalog -> output/meos-idl.json +python generate_openapi.py # -> output/meos-openapi.json +``` + +Pure `dict` → `dict`: no libclang, no MEOS runtime, deterministic output +(sorted paths/schemas) so generated diffs are reviewable. + +## Projection rules + +| MEOS concept | OpenAPI | +|---|---| +| stateless-exposable function | one `POST /{function}` operation, `operationId = function` | +| `category` | operation `tags` + `x-meos-category`; spec-level `tags` list | +| parameter | property of the JSON request body (all required, `additionalProperties:false`) | +| `wire.kind = json` scalar | `{"type": integer\|number\|boolean\|string}` | +| `wire` enum | `$ref` to a component enum schema (string, real C constant names) | +| `wire.kind = serialized` | `allOf` → `$ref` to the type's component schema, plus `x-meos-decode` (request) / `x-meos-encode` (response) | +| `wire.kind = array` (builder `(Elem **,count)`) | `{"type":"array","items":}` + `x-meos-decode`; the C `count` is the array length | +| out-parameter result (`from_outparam`) | the out-param value is the response (scalar JSON or serialized); `presence_return` false ⇒ `204` | +| `wire.result.kind = void` | `204 No Content` | +| any error | `default` → `#/components/responses/MeosError` | +| `typeEncodings[T]` | `components.schemas.T` = `{"type":"string", x-meos-encodings, x-meos-in, x-meos-out}` | + +RPC-style, not resource-style, is deliberate: MEOS is a value algebra, so a +function ≈ an **OGC API – Processes** "process". A resource model +(OGC API – Moving Features collections) is a different projection, layered +later (and already partly served by +[MobilityAPI](https://github.com/MobilityDB/MobilityAPI)). + +## `x-meos-*` extensions + +The spec is self-describing for downstream generators (server, MCP, gRPC): + +- `info.x-meos-coverage` — `{functions, exposed}`. +- operation `x-meos-category`, `x-meos-encode`. +- serialized request property `x-meos-decode` — the MEOS parse function. +- component schema `x-meos-encodings` / `x-meos-in` / `x-meos-out` — the wire + encodings and the MEOS in/out function names. + +A server generator marshals a request by calling `x-meos-decode` on each +serialized string, invoking the function, and calling `x-meos-encode` on the +result — no extra metadata needed beyond this document. + +## Coverage (live MobilityDB `master`) + +2161 **public** functions → **1952 operations (85%)** — the internal +`meos_internal*.h` programmer API (511 fns, `Datum`-generic) is +policy-excluded. Tagged across `predicate`, `transformation`, `accessor`, +`io`, `conversion`, `setop`, `aggregate`, `constructor`. The remaining +public functions (multi-out/array builders, opaque-no-codec, polymorphic +input-decode, lifecycle/index) carry a truthful `reason` and are +overridable via `meta/meos-meta.json`. + +## Limitations / roadmap + +- **No OpenAPI conformance validation** in-tree yet (structural checks only: + every path a single `POST` with responses, all `$ref`s resolve). Adding + `openapi-spec-validator` to CI is a follow-up. +- **MCP tool manifest** — the same `wire`/`typeEncodings` model maps directly + onto MCP tool schemas; a sibling generator is the natural next unit. +- **Runtime server** — a generated marshaling server (decode → call → encode) + is out of scope here; this PR delivers the *contract*, not the server. +- **OGC API – Moving Features** resource projection is a separate effort. +- Preferred in/out per type currently follows catalog scan order (e.g. + `tbool_in` may be picked over `temporal_in`); both are valid decoders, but + refining the preference is a small enrichment-side follow-up. diff --git a/generate_openapi.py b/generate_openapi.py new file mode 100644 index 0000000..0a12a11 --- /dev/null +++ b/generate_openapi.py @@ -0,0 +1,40 @@ +# Generate an OpenAPI 3.1 contract from the enriched MEOS catalog. +# +# Usage: +# python run.py # first, to produce the catalog +# python generate_openapi.py # reads output/meos-idl.json +# python generate_openapi.py path.json [out.json] + +import json +import sys +from pathlib import Path + +from generator.openapi import build_openapi + +IN_PATH = Path(sys.argv[1]) if len(sys.argv) > 1 else Path("output/meos-idl.json") +OUT_PATH = Path(sys.argv[2]) if len(sys.argv) > 2 else Path("output/meos-openapi.json") + + +def main() -> None: + if not IN_PATH.exists(): + sys.exit(f"Catalog not found: {IN_PATH} — run `python run.py` first.") + + catalog = json.loads(IN_PATH.read_text()) + if "functions" not in catalog or not any( + "network" in f for f in catalog["functions"] + ): + sys.exit(f"{IN_PATH} is not enriched (no `network` fields). " + "Run the enrichment pass first.") + + spec = build_openapi(catalog) + + OUT_PATH.parent.mkdir(parents=True, exist_ok=True) + OUT_PATH.write_text(json.dumps(spec, indent=2)) + + print(f"[openapi] {len(spec['paths'])} operations, " + f"{len(spec['components']['schemas'])} component schemas " + f"→ {OUT_PATH}", file=sys.stderr) + + +if __name__ == "__main__": + main() diff --git a/generator/openapi.py b/generator/openapi.py new file mode 100644 index 0000000..6244bac --- /dev/null +++ b/generator/openapi.py @@ -0,0 +1,209 @@ +"""OpenAPI 3.1 generator. + +Projects the *enriched* MEOS catalog (``meos-idl.json`` with ``category`` / +``network`` / ``wire`` / ``typeEncodings``, produced by ``parser/enrich.py``) +onto an OpenAPI 3.1 service contract. + +The projection is deliberately RPC-style — MEOS is a value algebra, not a +REST resource model, so each *stateless-exposable* function becomes one +``POST /{function}`` operation (≈ an OGC API – Processes "process"). Opaque +values cross the wire as strings carried in their `typeEncodings` (text / +MF-JSON / WKB); each opaque type and referenced enum becomes a reusable +component schema. ``x-meos-*`` extensions carry the decode/encode function +names and category so a downstream server or MCP generator can consume this +same document. + +Pure ``dict`` → ``dict``; no libclang and no MEOS runtime. Only functions +with ``network.exposable == true`` are emitted; the rest are reported by +``build_openapi``'s return-value count via the caller. +""" + +import re + +_QUAL_RE = re.compile(r"\b(const|volatile|struct|union|enum)\b") + +_PRIMITIVE = { + "integer": {"type": "integer"}, + "number": {"type": "number"}, + "boolean": {"type": "boolean"}, + "string": {"type": "string"}, +} + + +def _clean_type(c_type: str) -> str: + """``const struct Temporal *`` -> ``Temporal`` (matches typeEncodings keys).""" + return " ".join(_QUAL_RE.sub(" ", c_type).replace("*", " ").split()) + + +def _scalar_schema(wire: dict, used_enums: set) -> dict: + if wire.get("enum"): + used_enums.add(wire["enum"]) + return {"$ref": f"#/components/schemas/{wire['enum']}"} + return dict(_PRIMITIVE.get(wire.get("json", "string"), {"type": "string"})) + + +def _value_schema(wire: dict, used_types: set, used_enums: set) -> dict: + """Schema for one parameter or the result.""" + kind = wire["kind"] + if kind == "json": + return _scalar_schema(wire, used_enums) + if kind == "serialized": + t = _clean_type(wire["cType"]) + used_types.add(t) + return {"$ref": f"#/components/schemas/{t}"} + if kind == "array": + return {"type": "array", + "items": _value_schema(wire["element"], used_types, + used_enums)} + # Should not happen for an exposable function. + return {"type": "string"} + + +def _operation(fn: dict, used_types: set, used_enums: set) -> dict: + wire = fn["wire"] + op = { + "operationId": fn["name"], + "summary": fn.get("doc") or fn["name"], + "tags": [fn["category"]], + "x-meos-category": fn["category"], + } + + params = wire.get("params", []) + if params: + props, required = {}, [] + for p in params: + props[p["name"]] = _value_schema(p, used_types, used_enums) + required.append(p["name"]) + if p["kind"] == "serialized": + props[p["name"]] = { + "allOf": [props[p["name"]]], + "x-meos-decode": p["decode"], + } + elif p["kind"] == "array": + props[p["name"]] = { + **props[p["name"]], + "x-meos-decode": p["element"]["decode"], + } + op["requestBody"] = { + "required": True, + "content": {"application/json": {"schema": { + "type": "object", + "required": required, + "additionalProperties": False, + "properties": props, + }}}, + } + + result = wire["result"] + if result["kind"] == "void": + op["responses"] = {"204": {"description": "No content"}} + else: + schema = _value_schema(result, used_types, used_enums) + content_schema = schema + if result["kind"] == "serialized": + op["x-meos-encode"] = result["encode"] + op["responses"] = {"200": { + "description": "Result", + "content": {"application/json": {"schema": content_schema}}, + }} + op["responses"]["default"] = { + "$ref": "#/components/responses/MeosError" + } + return op + + +def _type_schema(name: str, type_encodings: dict) -> dict: + te = type_encodings.get(name) + if not te: + return {"type": "string", "title": name} + encs = te.get("encodings", []) + return { + "type": "string", + "title": name, + "description": ( + f"Serialized MEOS {name}. Wire encodings: {', '.join(encs)} " + f"(e.g. WKT / MF-JSON / HexWKB)." + ), + "x-meos-encodings": encs, + "x-meos-in": te.get("in"), + "x-meos-out": te.get("out"), + } + + +def _enum_schema(name: str, enums: list) -> dict: + for e in enums: + if e["name"] == name: + return { + "type": "string", + "title": name, + "enum": [v["name"] for v in e.get("values", [])], + "x-meos-c-enum": True, + } + return {"type": "string", "title": name} + + +def build_openapi(catalog: dict, *, title: str = "MEOS API", + version: str = "0.1.0") -> dict: + """Build an OpenAPI 3.1 document from an enriched catalog.""" + functions = sorted( + (f for f in catalog.get("functions", []) + if f.get("network", {}).get("exposable")), + key=lambda f: f["name"], + ) + type_encodings = catalog.get("typeEncodings", {}) + enums = catalog.get("enums", []) + + used_types: set = set() + used_enums: set = set() + paths: dict = {} + tags_seen: set = set() + + for fn in functions: + paths[f"/{fn['name']}"] = { + "post": _operation(fn, used_types, used_enums) + } + tags_seen.add(fn["category"]) + + schemas = {} + for t in sorted(used_types): + schemas[t] = _type_schema(t, type_encodings) + for e in sorted(used_enums): + schemas[e] = _enum_schema(e, enums) + + total = len(catalog.get("functions", [])) + return { + "openapi": "3.1.0", + "info": { + "title": title, + "version": version, + "description": ( + "Auto-generated from the MEOS-API catalog. Each operation is " + "a stateless-exposable MEOS function projected RPC-style as " + "`POST /{function}`; opaque values cross the wire as strings " + "in the encodings listed on their component schema. " + "Generated, do not edit by hand." + ), + "x-meos-coverage": { + "functions": total, + "exposed": len(functions), + }, + }, + "tags": [{"name": t} for t in sorted(tags_seen)], + "paths": dict(sorted(paths.items())), + "components": { + "schemas": schemas, + "responses": { + "MeosError": { + "description": "MEOS error", + "content": {"application/json": {"schema": { + "type": "object", + "properties": { + "error": {"type": "string"}, + "code": {"type": "integer"}, + }, + "required": ["error"], + }}}, + } + }, + }, + } diff --git a/tests/test_openapi.py b/tests/test_openapi.py new file mode 100644 index 0000000..2e33ff8 --- /dev/null +++ b/tests/test_openapi.py @@ -0,0 +1,183 @@ +"""Unit tests for generator/openapi.py. + +Runs without libclang or pytest: python3 tests/test_openapi.py +""" + +import sys +import unittest +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).resolve().parents[1])) + +from generator.openapi import build_openapi + +TEMP = "const struct Temporal *" + + +def serialized(name, ctype, decode): + return {"name": name, "kind": "serialized", "cType": ctype, + "decode": decode, "encodings": ["mfjson", "text", "wkb"]} + + +CATALOG = { + "functions": [ + { # serialized params, scalar result + "name": "temporal_eq", "category": "predicate", + "network": {"exposable": True, "method": "POST", "reason": None}, + "wire": { + "params": [serialized("temp1", TEMP, "temporal_in"), + serialized("temp2", TEMP, "temporal_in")], + "result": {"kind": "json", "json": "integer"}, + }, + }, + { # enum param, serialized result + "name": "temporal_set_interp", "category": "transformation", + "doc": "Set the interpolation of a temporal value.", + "network": {"exposable": True, "method": "POST", "reason": None}, + "wire": { + "params": [ + serialized("temp", TEMP, "temporal_in"), + {"name": "interp", "kind": "json", "json": "string", + "enum": "interpType"}, + ], + "result": {"kind": "serialized", + "cType": "struct Temporal *", + "encode": "temporal_out", + "encodings": ["mfjson", "text", "wkb"]}, + }, + }, + { # no params, void result + "name": "noop_op", "category": "transformation", + "network": {"exposable": True, "method": "POST", "reason": None}, + "wire": {"params": [], "result": {"kind": "void"}}, + }, + { # input-array builder + "name": "temporal_merge_array", "category": "transformation", + "network": {"exposable": True, "method": "POST", + "reason": None}, + "wire": { + "params": [{"name": "temparr", "kind": "array", + "count_param": "count", + "element": {"kind": "serialized", + "cType": "struct Temporal *", + "decode": "temporal_in", + "encodings": ["text"]}}], + "result": {"kind": "serialized", + "cType": "struct Temporal *", + "encode": "temporal_out", + "encodings": ["text"]}, + }, + }, + { # not exposable -> excluded + "name": "tsequence_make", "category": "constructor", + "network": {"exposable": False, "method": None, + "reason": "array-or-out-param:instants"}, + "wire": {"params": [], "result": {"kind": "unsupported"}}, + }, + ], + "typeEncodings": { + "Temporal": {"encodings": ["mfjson", "text", "wkb"], + "in": "temporal_in", "out": "temporal_out"}, + }, + "enums": [{"name": "interpType", + "values": [{"name": "STEP", "value": 0}, + {"name": "LINEAR", "value": 1}]}], + "structs": [], +} + + +class OpenApiTests(unittest.TestCase): + def setUp(self): + self.spec = build_openapi(CATALOG, version="9.9.9") + + def test_envelope(self): + self.assertEqual(self.spec["openapi"], "3.1.0") + self.assertEqual(self.spec["info"]["version"], "9.9.9") + self.assertEqual(self.spec["info"]["x-meos-coverage"], + {"functions": 5, "exposed": 4}) + + def test_array_param(self): + op = self.spec["paths"]["/temporal_merge_array"]["post"] + sch = op["requestBody"]["content"]["application/json"]["schema"] + a = sch["properties"]["temparr"] + self.assertEqual(a["type"], "array") + self.assertEqual(a["items"], + {"$ref": "#/components/schemas/Temporal"}) + self.assertEqual(a["x-meos-decode"], "temporal_in") + + def test_non_exposable_excluded(self): + self.assertNotIn("/tsequence_make", self.spec["paths"]) + self.assertEqual(len(self.spec["paths"]), 4) + + def test_paths_sorted(self): + keys = list(self.spec["paths"]) + self.assertEqual(keys, sorted(keys)) + + def test_predicate_operation(self): + op = self.spec["paths"]["/temporal_eq"]["post"] + self.assertEqual(op["operationId"], "temporal_eq") + self.assertEqual(op["tags"], ["predicate"]) + self.assertEqual(op["x-meos-category"], "predicate") + body = op["requestBody"]["content"]["application/json"]["schema"] + self.assertEqual(body["required"], ["temp1", "temp2"]) + self.assertFalse(body["additionalProperties"]) + temp1 = body["properties"]["temp1"] + self.assertEqual(temp1["allOf"], + [{"$ref": "#/components/schemas/Temporal"}]) + self.assertEqual(temp1["x-meos-decode"], "temporal_in") + r200 = op["responses"]["200"]["content"]["application/json"]["schema"] + self.assertEqual(r200, {"type": "integer"}) + self.assertEqual(op["responses"]["default"], + {"$ref": "#/components/responses/MeosError"}) + + def test_enum_param_and_serialized_result(self): + op = self.spec["paths"]["/temporal_set_interp"]["post"] + self.assertEqual(op["summary"], + "Set the interpolation of a temporal value.") + props = op["requestBody"]["content"]["application/json"]["schema"][ + "properties"] + self.assertEqual(props["interp"], + {"$ref": "#/components/schemas/interpType"}) + self.assertEqual(op["x-meos-encode"], "temporal_out") + r = op["responses"]["200"]["content"]["application/json"]["schema"] + self.assertEqual(r, {"$ref": "#/components/schemas/Temporal"}) + + def test_void_operation(self): + op = self.spec["paths"]["/noop_op"]["post"] + self.assertNotIn("requestBody", op) + self.assertIn("204", op["responses"]) + + def test_components(self): + schemas = self.spec["components"]["schemas"] + self.assertEqual(schemas["Temporal"]["type"], "string") + self.assertEqual(schemas["Temporal"]["x-meos-in"], "temporal_in") + self.assertEqual(schemas["Temporal"]["x-meos-encodings"], + ["mfjson", "text", "wkb"]) + self.assertEqual(schemas["interpType"]["enum"], ["STEP", "LINEAR"]) + self.assertTrue(schemas["interpType"]["x-meos-c-enum"]) + self.assertIn("MeosError", self.spec["components"]["responses"]) + + def test_all_refs_resolve(self): + import json + schemas = self.spec["components"]["schemas"] + responses = self.spec["components"]["responses"] + for ref in self._refs(self.spec): + parts = ref.split("/") # #/components// + kind, name = parts[2], parts[3] + target = schemas if kind == "schemas" else responses + self.assertIn(name, target, f"dangling $ref {ref}") + + def _refs(self, node): + if isinstance(node, dict): + for k, v in node.items(): + if k == "$ref": + yield v + else: + yield from self._refs(v) + elif isinstance(node, list): + for v in node: + yield from self._refs(v) + + +if __name__ == "__main__": + unittest.main(verbosity=2)