diff --git a/CLAUDE.md b/CLAUDE.md index d52cbce..4b811f8 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -26,7 +26,7 @@ Two active binaries under `cmd/`: | Service | Port | Role | |---|---|---| | `cmd/api` | 8080 | REST API — reads/writes places and accessibility data | -| `cmd/ingestion` | — | Placeholder — future OpenStreetMap sync | +| `cmd/ingestion` | — | Batch worker: reads OSM .osm.pbf files, filters POIs, upserts to the places table | **Data flow:** 1. Client submits accessibility data via API @@ -53,6 +53,15 @@ The API is a pure data layer. It stores and returns accessibility facts; it neve **`internal/geo`** — PostGIS spatial queries (`ST_DWithin`, `ST_MakeEnvelope`) +**`internal/osm`** — OpenStreetMap ingestion: +- `Evaluate(tags)` — allowlist-based POI filter; returns matched category or excludes +- `TransformNode(...)` — converts an OSM node into a `models.Place` +- `DeriveRank(...)` — maps category + tags to RankLandmark / RankEstablishment / RankFeature +- `StreamNodes(ctx, r, sink)` — streams OSM nodes from a `.osm.pbf` reader via paulmach/osm + +**`internal/place`** — Repository for the places table: +- `UpsertBatch(ctx, places)` — bulk insert/update using `(osm_id, osm_type)` conflict key + **`internal/db`** — GORM setup, AutoMigrate, PostGIS spatial index creation ## Patterns diff --git a/cmd/ingestion/config.go b/cmd/ingestion/config.go new file mode 100644 index 0000000..17d9315 --- /dev/null +++ b/cmd/ingestion/config.go @@ -0,0 +1,67 @@ +/* + * Copyright (C) 2026 InWheel Contributors + * SPDX-License-Identifier: AGPL-3.0-only + */ + +package main + +import ( + "fmt" + "strconv" + "strings" +) + +const defaultDBPort = 5432 + +// config holds the configuration for cmd/ingestion. All values come from environment variables. +type config struct { + DBHost string + DBPort int + DBUser string + DBPassword string + DBName string + DBSSLMode string + OSMPBFPath string +} + +// loadConfig reads configuration from the given map (typically os.Environ wrapped as a map). +// Required variables that are absent or malformed produce a single error listing every problem. +func loadConfig(env map[string]string) (config, error) { + var errs []string + + osmPath := env["OSM_PBF_PATH"] + if strings.TrimSpace(osmPath) == "" { + errs = append(errs, "OSM_PBF_PATH is required but was not set") + } + + port := defaultDBPort + if raw, ok := env["DB_PORT"]; ok && raw != "" { + parsed, err := strconv.Atoi(raw) + if err != nil { + errs = append(errs, fmt.Sprintf("DB_PORT must be an integer, got: %q", raw)) + } else { + port = parsed + } + } + + if len(errs) > 0 { + return config{}, fmt.Errorf("invalid configuration:\n - %s", strings.Join(errs, "\n - ")) + } + + return config{ + DBHost: valueOr(env, "DB_HOST", "localhost"), + DBPort: port, + DBUser: valueOr(env, "DB_USER", "postgres"), + DBPassword: valueOr(env, "DB_PASSWORD", "postgres"), + DBName: valueOr(env, "DB_NAME", "inwheel"), + DBSSLMode: valueOr(env, "DB_SSLMODE", "disable"), + OSMPBFPath: osmPath, + }, nil +} + +func valueOr(env map[string]string, key, fallback string) string { + if v, ok := env[key]; ok && v != "" { + return v + } + return fallback +} diff --git a/cmd/ingestion/config_test.go b/cmd/ingestion/config_test.go new file mode 100644 index 0000000..937d32c --- /dev/null +++ b/cmd/ingestion/config_test.go @@ -0,0 +1,85 @@ +/* + * Copyright (C) 2026 InWheel Contributors + * SPDX-License-Identifier: AGPL-3.0-only + */ + +package main + +import ( + "strings" + "testing" +) + +func TestLoadConfig_AllValuesProvided(t *testing.T) { + env := map[string]string{ + "DB_HOST": "db.example.com", + "DB_PORT": "6543", + "DB_USER": "inwheel_user", + "DB_PASSWORD": "s3cret", + "DB_NAME": "inwheel_prod", + "DB_SSLMODE": "require", + "OSM_PBF_PATH": "/data/finland.osm.pbf", + } + + cfg, err := loadConfig(env) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if cfg.DBHost != "db.example.com" || cfg.DBPort != 6543 || cfg.DBUser != "inwheel_user" { + t.Errorf("unexpected DB config: %+v", cfg) + } + if cfg.OSMPBFPath != "/data/finland.osm.pbf" { + t.Errorf("OSM_PBF_PATH not honored: %q", cfg.OSMPBFPath) + } +} + +func TestLoadConfig_AppliesDefaults(t *testing.T) { + env := map[string]string{"OSM_PBF_PATH": "/tmp/a.pbf"} + + cfg, err := loadConfig(env) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if cfg.DBHost != "localhost" || cfg.DBPort != 5432 || cfg.DBUser != "postgres" || + cfg.DBPassword != "postgres" || cfg.DBName != "inwheel" || cfg.DBSSLMode != "disable" { + t.Errorf("defaults not applied: %+v", cfg) + } +} + +func TestLoadConfig_MissingOSMPath(t *testing.T) { + _, err := loadConfig(map[string]string{}) + if err == nil { + t.Fatal("expected error for missing OSM_PBF_PATH, got nil") + } + if !strings.Contains(err.Error(), "OSM_PBF_PATH") { + t.Errorf("error should mention OSM_PBF_PATH: %v", err) + } +} + +func TestLoadConfig_MalformedPort(t *testing.T) { + env := map[string]string{ + "OSM_PBF_PATH": "/tmp/a.pbf", + "DB_PORT": "not-a-number", + } + _, err := loadConfig(env) + if err == nil { + t.Fatal("expected error for malformed DB_PORT, got nil") + } + if !strings.Contains(err.Error(), "DB_PORT") { + t.Errorf("error should mention DB_PORT: %v", err) + } +} + +func TestLoadConfig_AccumulatesMultipleErrors(t *testing.T) { + env := map[string]string{"DB_PORT": "bad"} + + _, err := loadConfig(env) + if err == nil { + t.Fatal("expected error, got nil") + } + msg := err.Error() + if !strings.Contains(msg, "OSM_PBF_PATH") || !strings.Contains(msg, "DB_PORT") { + t.Errorf("expected both errors, got: %v", err) + } +} diff --git a/cmd/ingestion/main.go b/cmd/ingestion/main.go new file mode 100644 index 0000000..9678e10 --- /dev/null +++ b/cmd/ingestion/main.go @@ -0,0 +1,147 @@ +/* + * Copyright (C) 2026 InWheel Contributors + * SPDX-License-Identifier: AGPL-3.0-only + */ + +package main + +import ( + "context" + "fmt" + "log/slog" + "os" + + "github.com/InWheelOrg/inwheel-api/internal/db" + "github.com/InWheelOrg/inwheel-api/internal/osm" + "github.com/InWheelOrg/inwheel-api/internal/place" + "github.com/InWheelOrg/inwheel-api/pkg/models" +) + +const batchSize = 1000 + +func main() { + slog.SetDefault(slog.New(slog.NewJSONHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelInfo}))) + + if len(os.Args) < 2 { + fmt.Fprintln(os.Stderr, "usage: inwheel-ingestion ") + os.Exit(1) + } + + cfg, err := loadConfig(environAsMap()) + if err != nil { + slog.Error("config load failed", "error", err) + os.Exit(1) + } + slog.Info("config loaded", + "db.host", cfg.DBHost, + "db.port", cfg.DBPort, + "db.user", cfg.DBUser, + "db.name", cfg.DBName, + "db.sslmode", cfg.DBSSLMode, + "osm.pbf_path", cfg.OSMPBFPath, + ) + + switch os.Args[1] { + case "full-import": + if err := runFullImport(context.Background(), cfg); err != nil { + slog.Error("full-import failed", "error", err) + os.Exit(1) + } + case "diff-sync": + slog.Error("diff-sync not implemented yet") + os.Exit(1) + default: + fmt.Fprintf(os.Stderr, "unknown command: %s\n", os.Args[1]) + os.Exit(1) + } +} + +func environAsMap() map[string]string { + out := make(map[string]string, len(os.Environ())) + for _, kv := range os.Environ() { + for i := 0; i < len(kv); i++ { + if kv[i] == '=' { + out[kv[:i]] = kv[i+1:] + break + } + } + } + return out +} + +func runFullImport(ctx context.Context, cfg config) error { + gormDB, err := db.Connect(db.Config{ + Host: cfg.DBHost, + Port: cfg.DBPort, + User: cfg.DBUser, + Password: cfg.DBPassword, + Name: cfg.DBName, + SSLMode: cfg.DBSSLMode, + }) + if err != nil { + return fmt.Errorf("db connect: %w", err) + } + if err := db.Migrate(gormDB); err != nil { + return fmt.Errorf("db migrate: %w", err) + } + + repo := place.NewRepository(gormDB) + + f, err := os.Open(cfg.OSMPBFPath) + if err != nil { + return fmt.Errorf("open pbf: %w", err) + } + defer f.Close() //nolint:errcheck + + var ( + buffer []models.Place + processed int + written int + ) + + flush := func() error { + if len(buffer) == 0 { + return nil + } + if err := repo.UpsertBatch(ctx, buffer); err != nil { + return err + } + written += len(buffer) + buffer = buffer[:0] + return nil + } + + err = osm.StreamNodes(ctx, f, func(node osm.Node) error { + processed++ + if processed%10000 == 0 { + slog.Info("progress", "processed", processed, "written", written) + } + + category, ok := osm.Evaluate(node.Tags) + if !ok { + return nil + } + + p, err := osm.TransformNode(node.ID, node.Lat, node.Lng, node.Tags, category) + if err != nil { + slog.Warn("skipping node, transform error", "node_id", node.ID, "error", err) + return nil + } + + buffer = append(buffer, *p) + if len(buffer) >= batchSize { + return flush() + } + return nil + }) + if err != nil { + return fmt.Errorf("stream: %w", err) + } + + if err := flush(); err != nil { + return fmt.Errorf("final flush: %w", err) + } + + slog.Info("full-import complete", "processed", processed, "written", written) + return nil +} diff --git a/cmd/ingestion/main_integration_test.go b/cmd/ingestion/main_integration_test.go new file mode 100644 index 0000000..caff235 --- /dev/null +++ b/cmd/ingestion/main_integration_test.go @@ -0,0 +1,48 @@ +//go:build integration + +/* + * Copyright (C) 2026 InWheel Contributors + * SPDX-License-Identifier: AGPL-3.0-only + */ + +package main + +import ( + "context" + "testing" + + "github.com/InWheelOrg/inwheel-api/internal/testhelpers" + "github.com/InWheelOrg/inwheel-api/pkg/models" +) + +func TestRunFullImport_AgainstFixturePBF(t *testing.T) { + ctx := context.Background() + db, connInfo, cleanup, err := testhelpers.StartPostgresWithConnInfo(ctx) + if err != nil { + t.Fatalf("start postgres: %v", err) + } + defer cleanup() + + cfg := config{ + DBHost: connInfo.Host, + DBPort: connInfo.Port, + DBUser: connInfo.User, + DBPassword: connInfo.Password, + DBName: connInfo.Name, + DBSSLMode: connInfo.SSLMode, + OSMPBFPath: "../../testdata/andorra-sample.osm.pbf", + } + + if err := runFullImport(ctx, cfg); err != nil { + t.Fatalf("runFullImport: %v", err) + } + + var count int64 + if err := db.Model(&models.Place{}).Count(&count).Error; err != nil { + t.Fatalf("count: %v", err) + } + t.Logf("imported %d places from the Andorra fixture", count) + if count == 0 { + t.Fatal("expected at least one place row, got zero") + } +} diff --git a/go.mod b/go.mod index 51d84be..6bb3ce8 100644 --- a/go.mod +++ b/go.mod @@ -9,6 +9,7 @@ require ( github.com/jackc/pgx/v5 v5.9.2 github.com/oapi-codegen/nethttp-middleware v1.1.2 github.com/oapi-codegen/runtime v1.4.0 + github.com/paulmach/osm v0.9.0 github.com/testcontainers/testcontainers-go v0.42.0 github.com/testcontainers/testcontainers-go/modules/postgres v0.42.0 golang.org/x/time v0.15.0 @@ -20,6 +21,7 @@ require ( require ( dario.cat/mergo v1.0.2 // indirect github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c // indirect + github.com/DataDog/czlib v0.0.0-20240814115052-86a9592b3985 // indirect github.com/Microsoft/go-winio v0.6.2 // indirect github.com/apapsch/go-jsonmerge/v2 v2.0.0 // indirect github.com/cenkalti/backoff/v4 v4.3.0 // indirect @@ -66,6 +68,8 @@ require ( github.com/oasdiff/yaml3 v0.0.12 // indirect github.com/opencontainers/go-digest v1.0.0 // indirect github.com/opencontainers/image-spec v1.1.1 // indirect + github.com/paulmach/orb v0.12.0 // indirect + github.com/paulmach/protoscan v0.2.1 // indirect github.com/perimeterx/marshmallow v1.1.5 // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55 // indirect @@ -77,6 +81,7 @@ require ( github.com/tklauser/numcpus v0.11.0 // indirect github.com/woodsbury/decimal128 v1.3.0 // indirect github.com/yusufpapurcu/wmi v1.2.4 // indirect + go.mongodb.org/mongo-driver v1.17.4 // indirect go.opentelemetry.io/auto/sdk v1.2.1 // indirect go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0 // indirect go.opentelemetry.io/otel v1.41.0 // indirect @@ -88,4 +93,5 @@ require ( golang.org/x/sync v0.20.0 // indirect golang.org/x/sys v0.42.0 // indirect golang.org/x/text v0.36.0 // indirect + google.golang.org/protobuf v1.36.10 // indirect ) diff --git a/go.sum b/go.sum index aad8048..15b324b 100644 --- a/go.sum +++ b/go.sum @@ -4,6 +4,8 @@ github.com/AdaLogics/go-fuzz-headers v0.0.0-20240806141605-e8a1dd7889d6 h1:He8af github.com/AdaLogics/go-fuzz-headers v0.0.0-20240806141605-e8a1dd7889d6/go.mod h1:8o94RPi1/7XTJvwPpRSzSUedZrtlirdB3r9Z20bi2f8= github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c h1:udKWzYgxTojEKWjV8V+WSxDXJ4NFATAsZjh8iIbsQIg= github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E= +github.com/DataDog/czlib v0.0.0-20240814115052-86a9592b3985 h1:0nepyu+UcpcOt3rrr0G4PvNDuoEW2aoqtbh2NK0AQ3w= +github.com/DataDog/czlib v0.0.0-20240814115052-86a9592b3985/go.mod h1:ROY4muaTWpoeQAx/oUkvxe9zKCmgU5xDGXsfEbA+omc= github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY= github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU= github.com/RaveNoX/go-jsoncommentstrip v1.0.0/go.mod h1:78ihd09MekBnJnxpICcwzCMzGrKSKYe4AqU6PDYYpjk= @@ -27,6 +29,7 @@ github.com/cpuguy83/dockercfg v0.3.2/go.mod h1:sugsbF4//dDlL/i+S+rtpIWp+5h0BHJHf github.com/creack/pty v1.1.24 h1:bJrF4RRfyJnbTJqzRLHzcGaZK1NeM5kTC9jGgovnR1s= github.com/creack/pty v1.1.24/go.mod h1:08sCNb52WyoAwi2QDyzUCTgcvVFhUzewun7wtTfvcwE= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/dhui/dktest v0.4.6 h1:+DPKyScKSEp3VLtbMDHcUq6V5Lm5zfZZVb0Sk7Ahom4= @@ -64,6 +67,10 @@ github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= github.com/golang-migrate/migrate/v4 v4.19.1 h1:OCyb44lFuQfYXYLx1SCxPZQGU7mcaZ7gH9yH4jSFbBA= github.com/golang-migrate/migrate/v4 v4.19.1/go.mod h1:CTcgfjxhaUtsLipnLoQRWCrjYXycRz/g5+RWDuYgPrE= +github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= +github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= +github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= @@ -86,10 +93,16 @@ github.com/jinzhu/now v1.1.5/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/ github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= github.com/juju/gnuflag v0.0.0-20171113085948-2ce1bb71843d/go.mod h1:2PavIy+JPciBPrBUjwbNvtwB6RQlve+hkpll6QSNmOE= +github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= +github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/klauspost/compress v1.13.6/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk= github.com/klauspost/compress v1.18.5 h1:/h1gH5Ce+VWNLSWqPzOVn6XBO+vJbCNGvjoaGBFW2IE= github.com/klauspost/compress v1.18.5/go.mod h1:cwPg85FWrGar70rWktvGQj8/hthj3wpl0PGDogxkrSQ= +github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw= @@ -122,6 +135,7 @@ github.com/moby/term v0.5.2 h1:6qk3FJAFDs6i/q3W/pQ97SX192qKfZgGjCQqfCJkgzQ= github.com/moby/term v0.5.2/go.mod h1:d3djjFCrjnB+fl8NJux+EJzu0msscUP+f8it8hPkFLc= github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826 h1:RWengNIwukTxcDr9M+97sNutRR1RKhG96O6jWumTTnw= github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826/go.mod h1:TaXosZuwdSHYgviHp1DAtfrULt5eUgsSMsZf+YrPgl8= +github.com/montanaflynn/stats v0.0.0-20171201202039-1bf9dbcd8cbe/go.mod h1:wL8QJuTMNUDYhXwkmfOly8iTdp5TEcJFWZD2D7SIkUc= github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A= github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc= github.com/oapi-codegen/nethttp-middleware v1.1.2 h1:TQwEU3WM6ifc7ObBEtiJgbRPaCe513tvJpiMJjypVPA= @@ -136,6 +150,12 @@ github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8 github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= github.com/opencontainers/image-spec v1.1.1 h1:y0fUlFfIZhPF1W537XOLg0/fcx6zcHCJwooC2xJA040= github.com/opencontainers/image-spec v1.1.1/go.mod h1:qpqAh3Dmcf36wStyyWU+kCeDgrGnAve2nCC8+7h8Q0M= +github.com/paulmach/orb v0.12.0 h1:z+zOwjmG3MyEEqzv92UN49Lg1JFYx0L9GpGKNVDKk1s= +github.com/paulmach/orb v0.12.0/go.mod h1:5mULz1xQfs3bmQm63QEJA6lNGujuRafwA5S/EnuLaLU= +github.com/paulmach/osm v0.9.0 h1:hbfe9XSik+TECvwleEn3eUPZSPtlY6otd0MhbnB8aiw= +github.com/paulmach/osm v0.9.0/go.mod h1:L56sF1Rcd+IC36YkVjPr5FSVuid5sgpYUPgJZzmbSrs= +github.com/paulmach/protoscan v0.2.1 h1:rM0FpcTjUMvPUNk2BhPJrreDKetq43ChnL+x1sRg8O8= +github.com/paulmach/protoscan v0.2.1/go.mod h1:SpcSwydNLrxUGSDvXvO0P7g7AuhJ7lcKfDlhJCDw2gY= github.com/perimeterx/marshmallow v1.1.5 h1:a2LALqQ1BlHM8PZblsDdidgv1mWi1DgC2UmX50IvK2s= github.com/perimeterx/marshmallow v1.1.5/go.mod h1:dsXbUu8CRzfYP5a87xpp0xq9S3u0Vchtcl8we9tYaXw= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= @@ -158,6 +178,7 @@ github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+ github.com/stretchr/objx v0.5.3 h1:jmXUvGomnU1o3W/V5h2VEradbpJDwGrzugQQvL0POH4= github.com/stretchr/objx v0.5.3/go.mod h1:rDQraq+vQZU7Fde9LOZLr8Tax6zZvy4kuNKF+QYS+U0= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= @@ -165,6 +186,7 @@ github.com/testcontainers/testcontainers-go v0.42.0 h1:He3IhTzTZOygSXLJPMX7n44Xt github.com/testcontainers/testcontainers-go v0.42.0/go.mod h1:vZjdY1YmUA1qEForxOIOazfsrdyORJAbhi0bp8plN30= github.com/testcontainers/testcontainers-go/modules/postgres v0.42.0 h1:GCbb1ndrF7OTDiIvxXyItaDab4qkzTFJ48LKFdM7EIo= github.com/testcontainers/testcontainers-go/modules/postgres v0.42.0/go.mod h1:IRPBaI8jXdrNfD0e4Zm7Fbcgaz5shKxOQv4axiL09xs= +github.com/tidwall/pretty v1.0.0/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk= github.com/tklauser/go-sysconf v0.3.16 h1:frioLaCQSsF5Cy1jgRBrzr6t502KIIwQ0MArYICU0nA= github.com/tklauser/go-sysconf v0.3.16/go.mod h1:/qNL9xxDhc7tx3HSRsLWNnuzbVfh3e7gh/BmM179nYI= github.com/tklauser/numcpus v0.11.0 h1:nSTwhKH5e1dMNsCdVBukSZrURJRoHbSEQjdEbY+9RXw= @@ -173,8 +195,17 @@ github.com/ugorji/go/codec v1.2.12 h1:9LC83zGrHhuUA9l16C9AHXAqEV/2wBQ4nkvumAE65E github.com/ugorji/go/codec v1.2.12/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg= github.com/woodsbury/decimal128 v1.3.0 h1:8pffMNWIlC0O5vbyHWFZAt5yWvWcrHA+3ovIIjVWss0= github.com/woodsbury/decimal128 v1.3.0/go.mod h1:C5UTmyTjW3JftjUFzOVhC20BEQa2a4ZKOB5I6Zjb+ds= +github.com/xdg-go/pbkdf2 v1.0.0/go.mod h1:jrpuAogTd400dnrH08LKmI/xc1MbPOebTwRqcT5RDeI= +github.com/xdg-go/scram v1.1.1/go.mod h1:RaEWvsqvNKKvBPvcKeFjrG2cJqOkHTiyTpzz23ni57g= +github.com/xdg-go/stringprep v1.0.3/go.mod h1:W3f5j4i+9rC0kuIEJL0ky1VpHXQU3ocBgklLGvcBnW8= +github.com/youmark/pkcs8 v0.0.0-20181117223130-1be2e3e5546d/go.mod h1:rHwXgn7JulP+udvsHwJoVG1YGAP6VLg4y9I5dyZdqmA= +github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yusufpapurcu/wmi v1.2.4 h1:zFUKzehAFReQwLys1b/iSMl+JQGSCSjtVqQn9bBrPo0= github.com/yusufpapurcu/wmi v1.2.4/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0= +go.mongodb.org/mongo-driver v1.11.4/go.mod h1:PTSz5yu21bkT/wXpkS7WR5f0ddqw5quethTUn9WM+2g= +go.mongodb.org/mongo-driver v1.17.4 h1:jUorfmVzljjr0FLzYQsGP8cgN/qzzxlY9Vh0C9KFXVw= +go.mongodb.org/mongo-driver v1.17.4/go.mod h1:Hy04i7O2kC4RS06ZrhPRqj/u4DTYkFDAAccj+rVKqgQ= go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64= go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0 h1:F7Jx+6hwnZ41NSFTO5q4LYDtJRXBf2PD0rNBkeB/lus= @@ -189,23 +220,61 @@ go.opentelemetry.io/otel/sdk/metric v1.39.0 h1:cXMVVFVgsIf2YL6QkRF4Urbr/aMInf+2W go.opentelemetry.io/otel/sdk/metric v1.39.0/go.mod h1:xq9HEVH7qeX69/JnwEfp6fVq5wosJsY1mt4lLfYdVew= go.opentelemetry.io/otel/trace v1.41.0 h1:Vbk2co6bhj8L59ZJ6/xFTskY+tGAbOnCtQGVVa9TIN0= go.opentelemetry.io/otel/trace v1.41.0/go.mod h1:U1NU4ULCoxeDKc09yCWdWe+3QoyweJcISEVa1RBzOis= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= golang.org/x/crypto v0.49.0 h1:+Ng2ULVvLHnJ/ZFEq4KdcDd/cfjrrjjNSXNzxg0Y4U4= golang.org/x/crypto v0.49.0/go.mod h1:ErX4dUh2UM+CFYiXZRTcMpEcN8b/1gxEuv3nODoYtCA= +golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4= golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201204225414-ed752295db88/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.42.0 h1:omrd2nAlyT5ESRdCLYdm3+fMfNFE/+Rf4bDIQImRJeo= golang.org/x/sys v0.42.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.41.0 h1:QCgPso/Q3RTJx2Th4bDLqML4W6iJiaXFq2/ftQF13YU= golang.org/x/term v0.41.0/go.mod h1:3pfBgksrReYfZ5lvYM0kSO0LIkAl4Yl2bXOkKP7Ec2A= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.36.0 h1:JfKh3XmcRPqZPKevfXVpI1wXPTqbkE5f7JA92a55Yxg= golang.org/x/text v0.36.0/go.mod h1:NIdBknypM8iqVmPiuco0Dh6P5Jcdk8lJL0CUebqK164= golang.org/x/time v0.15.0 h1:bbrp8t3bGUeFOx08pvsMYRTCVSMk89u4tKbNOZbp88U= golang.org/x/time v0.15.0/go.mod h1:Y4YMaQmXwGQZoFaVFk4YpCt4FLQMYKZe9oeV/f4MSno= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= +google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= +google.golang.org/protobuf v1.36.10 h1:AYd7cD/uASjIL6Q9LiTjz8JLcrh/88q5UObnmY3aOOE= +google.golang.org/protobuf v1.36.10/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/internal/db/migrations/000007_add_places_osm_unique_index.down.sql b/internal/db/migrations/000007_add_places_osm_unique_index.down.sql new file mode 100644 index 0000000..440e956 --- /dev/null +++ b/internal/db/migrations/000007_add_places_osm_unique_index.down.sql @@ -0,0 +1 @@ +DROP INDEX IF EXISTS places_osm_unique; diff --git a/internal/db/migrations/000007_add_places_osm_unique_index.up.sql b/internal/db/migrations/000007_add_places_osm_unique_index.up.sql new file mode 100644 index 0000000..7d59aa7 --- /dev/null +++ b/internal/db/migrations/000007_add_places_osm_unique_index.up.sql @@ -0,0 +1,3 @@ +CREATE UNIQUE INDEX IF NOT EXISTS places_osm_unique + ON places (osm_id, osm_type) + WHERE osm_id <> 0; diff --git a/internal/osm/filter.go b/internal/osm/filter.go new file mode 100644 index 0000000..7ffe9fe --- /dev/null +++ b/internal/osm/filter.go @@ -0,0 +1,140 @@ +/* + * Copyright (C) 2026 InWheel Contributors + * SPDX-License-Identifier: AGPL-3.0-only + */ + +// Package osm contains all OpenStreetMap-specific ingestion logic: +// PBF streaming, POI filtering, and transformation to models.Place. +package osm + +import "github.com/InWheelOrg/inwheel-api/pkg/models" + +// amenityToCategory maps OSM amenity tag values to our category taxonomy. +var amenityToCategory = map[string]models.Category{ + // Restaurants and food + "restaurant": models.CategoryRestaurant, + "fast_food": models.CategoryRestaurant, + "food_court": models.CategoryRestaurant, + "cafe": models.CategoryCafe, + "ice_cream": models.CategoryCafe, + "biergarten": models.CategoryCafe, + "bar": models.CategoryBar, + "pub": models.CategoryBar, + "nightclub": models.CategoryBar, + + // Healthcare + "hospital": models.CategoryHealthcare, + "clinic": models.CategoryHealthcare, + "doctors": models.CategoryHealthcare, + "dentist": models.CategoryHealthcare, + "pharmacy": models.CategoryHealthcare, + "veterinary": models.CategoryHealthcare, + "nursing_home": models.CategoryHealthcare, + + // Education + "school": models.CategoryEducation, + "university": models.CategoryEducation, + "college": models.CategoryEducation, + "kindergarten": models.CategoryEducation, + "childcare": models.CategoryEducation, + "library": models.CategoryEducation, + "language_school": models.CategoryEducation, + "music_school": models.CategoryEducation, + "driving_school": models.CategoryEducation, + "research_institute": models.CategoryEducation, + + // Finance + "bank": models.CategoryFinance, + "atm": models.CategoryFinance, + "bureau_de_change": models.CategoryFinance, + "money_transfer": models.CategoryFinance, + + // Entertainment + "cinema": models.CategoryEntertainment, + "theatre": models.CategoryEntertainment, + "arts_centre": models.CategoryEntertainment, + "casino": models.CategoryEntertainment, + "community_centre": models.CategoryEntertainment, + "events_venue": models.CategoryEntertainment, + "social_centre": models.CategoryEntertainment, + + // Government + "townhall": models.CategoryGovernment, + "courthouse": models.CategoryGovernment, + "embassy": models.CategoryGovernment, + "police": models.CategoryGovernment, + "fire_station": models.CategoryGovernment, + "post_office": models.CategoryGovernment, + + // Transport (amenity-tagged) + "bus_station": models.CategoryTransport, + "ferry_terminal": models.CategoryTransport, + "taxi": models.CategoryTransport, + "car_rental": models.CategoryTransport, + "bicycle_rental": models.CategoryTransport, + "car_sharing": models.CategoryTransport, + + // Social + "social_facility": models.CategorySocial, + "shelter": models.CategorySocial, + + // Worship + "place_of_worship": models.CategoryWorship, + + // Other (everyday utility POIs) + "toilets": models.CategoryToilet, + "marketplace": models.CategoryOther, + "internet_cafe": models.CategoryOther, +} + +// publicTransportAllowlist enumerates public_transport tag values worth importing as transport POIs. +var publicTransportAllowlist = map[string]struct{}{ + "station": {}, + "stop_area": {}, +} + +// buildingQualifyingKeys are the tag keys that, when present alongside building=*, qualify a building as a POI. +var buildingQualifyingKeys = map[string]struct{}{ + "amenity": {}, + "shop": {}, + "tourism": {}, + "leisure": {}, + "office": {}, + "healthcare": {}, + "public_transport": {}, +} + +// Evaluate decides whether an OSM element's tags qualify it as a POI we want to ingest. +// Returns the matched category and true on inclusion, or empty category and false on exclusion. +// +// Allowlist-based: unknown tags are excluded by default. Order matters because some elements +// carry multiple qualifying tags (e.g. amenity=hospital + building=yes) — amenity wins. +func Evaluate(tags map[string]string) (models.Category, bool) { + if cat, ok := amenityToCategory[tags["amenity"]]; ok { + return cat, true + } + + if pt, ok := tags["public_transport"]; ok { + if _, allowed := publicTransportAllowlist[pt]; allowed { + return models.CategoryTransport, true + } + } + + if shop := tags["shop"]; shop != "" { + return models.CategoryShop, true + } + + if tags["building"] != "" { + for key := range buildingQualifyingKeys { + if tags[key] != "" { + // A building qualified by some other tag; recurse on that key by re-evaluating. + // In practice the amenity / shop / public_transport branches above already caught + // most cases. This branch handles building=* + an unmapped qualifying tag — return + // CategoryOther so we don't lose buildings that look like POIs. + return models.CategoryOther, true + } + } + } + + return "", false +} diff --git a/internal/osm/filter_test.go b/internal/osm/filter_test.go new file mode 100644 index 0000000..dc24c19 --- /dev/null +++ b/internal/osm/filter_test.go @@ -0,0 +1,55 @@ +/* + * Copyright (C) 2026 InWheel Contributors + * SPDX-License-Identifier: AGPL-3.0-only + */ + +package osm + +import ( + "testing" + + "github.com/InWheelOrg/inwheel-api/pkg/models" +) + +func TestEvaluate(t *testing.T) { + cases := []struct { + name string + tags map[string]string + want models.Category + included bool + }{ + {"restaurant", map[string]string{"amenity": "restaurant"}, models.CategoryRestaurant, true}, + {"cafe", map[string]string{"amenity": "cafe"}, models.CategoryCafe, true}, + {"bar", map[string]string{"amenity": "bar"}, models.CategoryBar, true}, + {"pub maps to bar", map[string]string{"amenity": "pub"}, models.CategoryBar, true}, + {"hospital", map[string]string{"amenity": "hospital"}, models.CategoryHealthcare, true}, + {"school", map[string]string{"amenity": "school"}, models.CategoryEducation, true}, + {"atm", map[string]string{"amenity": "atm"}, models.CategoryFinance, true}, + {"cinema", map[string]string{"amenity": "cinema"}, models.CategoryEntertainment, true}, + {"townhall", map[string]string{"amenity": "townhall"}, models.CategoryGovernment, true}, + {"bus_station", map[string]string{"amenity": "bus_station"}, models.CategoryTransport, true}, + {"social_facility", map[string]string{"amenity": "social_facility"}, models.CategorySocial, true}, + {"toilets", map[string]string{"amenity": "toilets"}, models.CategoryToilet, true}, + {"place_of_worship", map[string]string{"amenity": "place_of_worship"}, models.CategoryWorship, true}, + {"public_transport station", map[string]string{"public_transport": "station"}, models.CategoryTransport, true}, + {"shop generic", map[string]string{"shop": "supermarket"}, models.CategoryShop, true}, + {"building with amenity", map[string]string{"building": "yes", "amenity": "hospital"}, models.CategoryHealthcare, true}, + {"building without qualifying tag", map[string]string{"building": "yes"}, "", false}, + {"highway excluded", map[string]string{"highway": "residential"}, "", false}, + {"natural excluded", map[string]string{"natural": "peak"}, "", false}, + {"empty tags excluded", map[string]string{}, "", false}, + {"unknown amenity excluded", map[string]string{"amenity": "bench"}, "", false}, + } + + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + cat, ok := Evaluate(c.tags) + if ok != c.included { + t.Fatalf("inclusion mismatch: got %v want %v", ok, c.included) + } + if cat != c.want { + t.Fatalf("category mismatch: got %q want %q", cat, c.want) + } + }) + } +} diff --git a/internal/osm/pbf.go b/internal/osm/pbf.go new file mode 100644 index 0000000..35fd76d --- /dev/null +++ b/internal/osm/pbf.go @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2026 InWheel Contributors + * SPDX-License-Identifier: AGPL-3.0-only + */ + +package osm + +import ( + "context" + "fmt" + "io" + + pmosm "github.com/paulmach/osm" + "github.com/paulmach/osm/osmpbf" +) + +// Node is a domain representation of an OSM node, decoupled from the parser library. +type Node struct { + ID int64 + Lat float64 + Lng float64 + Tags map[string]string +} + +// NodeSink is the callback invoked for each node read from a PBF stream. +// Returning a non-nil error stops the stream and propagates the error to StreamNodes' caller. +type NodeSink func(Node) error + +// StreamNodes reads a .osm.pbf stream and invokes sink for each node element. +// Ways and relations are skipped silently. +func StreamNodes(ctx context.Context, r io.Reader, sink NodeSink) error { + scanner := osmpbf.New(ctx, r, 1) + defer scanner.Close() //nolint:errcheck + + for scanner.Scan() { + obj := scanner.Object() + node, ok := obj.(*pmosm.Node) + if !ok { + continue // skip ways, relations + } + + domainNode := Node{ + ID: int64(node.ID), + Lat: node.Lat, + Lng: node.Lon, + Tags: node.Tags.Map(), + } + + if err := sink(domainNode); err != nil { + return fmt.Errorf("node %d: %w", domainNode.ID, err) + } + } + + if err := scanner.Err(); err != nil { + return fmt.Errorf("pbf scan: %w", err) + } + return nil +} diff --git a/internal/osm/pbf_test.go b/internal/osm/pbf_test.go new file mode 100644 index 0000000..18e7191 --- /dev/null +++ b/internal/osm/pbf_test.go @@ -0,0 +1,61 @@ +/* + * Copyright (C) 2026 InWheel Contributors + * SPDX-License-Identifier: AGPL-3.0-only + */ + +package osm + +import ( + "context" + "errors" + "os" + "testing" +) + +func TestStreamNodes_FixturePBF(t *testing.T) { + f, err := os.Open("../../testdata/andorra-sample.osm.pbf") + if err != nil { + t.Skipf("fixture PBF not available: %v", err) + } + defer f.Close() //nolint:errcheck + + ctx := context.Background() + var totalNodes, includedPois int + err = StreamNodes(ctx, f, func(node Node) error { + totalNodes++ + if _, ok := Evaluate(node.Tags); ok { + includedPois++ + } + return nil + }) + if err != nil { + t.Fatalf("StreamNodes returned an error: %v", err) + } + + if totalNodes == 0 { + t.Fatal("expected at least one node from the fixture PBF, got zero") + } + t.Logf("streamed %d total nodes, %d qualifying POIs", totalNodes, includedPois) +} + +func TestStreamNodes_StopsOnSinkError(t *testing.T) { + f, err := os.Open("../../testdata/andorra-sample.osm.pbf") + if err != nil { + t.Skipf("fixture PBF not available: %v", err) + } + defer f.Close() //nolint:errcheck + + ctx := context.Background() + sentinel := errors.New("stop") + count := 0 + err = StreamNodes(ctx, f, func(node Node) error { + count++ + return sentinel + }) + if err == nil { + t.Fatal("expected an error, got nil") + } + if count != 1 { + t.Errorf("expected stream to stop after first node, got %d nodes", count) + } +} diff --git a/internal/osm/rank.go b/internal/osm/rank.go new file mode 100644 index 0000000..c44bf2a --- /dev/null +++ b/internal/osm/rank.go @@ -0,0 +1,59 @@ +/* + * Copyright (C) 2026 InWheel Contributors + * SPDX-License-Identifier: AGPL-3.0-only + */ + +package osm + +import "github.com/InWheelOrg/inwheel-api/pkg/models" + +// landmarkTransportAmenity identifies major transport hubs that should rank as landmarks. +var landmarkTransportAmenity = map[string]struct{}{ + "bus_station": {}, + "ferry_terminal": {}, +} + +// landmarkTransportPublic identifies major public_transport values that should rank as landmarks. +var landmarkTransportPublic = map[string]struct{}{ + "station": {}, +} + +// featureAmenity identifies amenity values that are minor utilities rather than primary establishments. +var featureAmenity = map[string]struct{}{ + "toilets": {}, + "atm": {}, + "shelter": {}, +} + +// DeriveRank picks a zoom-level priority for a place based on its category and OSM tags. +// Returns RankLandmark for major hubs, RankFeature for minor utilities, RankEstablishment otherwise. +func DeriveRank(category models.Category, tags map[string]string) models.Rank { + amenity := tags["amenity"] + publicTransport := tags["public_transport"] + + switch category { + case models.CategoryHealthcare: + if amenity == "hospital" { + return models.RankLandmark + } + case models.CategoryEducation: + if amenity == "university" { + return models.RankLandmark + } + case models.CategoryTransport: + if _, ok := landmarkTransportAmenity[amenity]; ok { + return models.RankLandmark + } + if _, ok := landmarkTransportPublic[publicTransport]; ok { + return models.RankLandmark + } + case models.CategoryToilet: + return models.RankFeature + case models.CategoryFinance, models.CategorySocial: + if _, ok := featureAmenity[amenity]; ok { + return models.RankFeature + } + } + + return models.RankEstablishment +} diff --git a/internal/osm/transformer.go b/internal/osm/transformer.go new file mode 100644 index 0000000..5f2a956 --- /dev/null +++ b/internal/osm/transformer.go @@ -0,0 +1,41 @@ +/* + * Copyright (C) 2026 InWheel Contributors + * SPDX-License-Identifier: AGPL-3.0-only + */ + +package osm + +import ( + "fmt" + + "github.com/InWheelOrg/inwheel-api/pkg/models" +) + +// TransformNode converts a filtered OSM node into a models.Place ready for upsert. +// The category must come from a prior call to Evaluate. +func TransformNode(osmID int64, lat, lng float64, tags map[string]string, category models.Category) (*models.Place, error) { + if category == "" { + return nil, fmt.Errorf("transform: category is empty for node %d", osmID) + } + + placeTags := make(models.PlaceTags, len(tags)) + for k, v := range tags { + placeTags[k] = v + } + + return &models.Place{ + OSMID: osmID, + OSMType: models.OSMNode, + Name: tags["name"], + Lat: lat, + Lng: lng, + Category: category, + Rank: DeriveRank(category, tags), + Tags: placeTags, + ExternalIDs: models.ExternalIDs{ + "osm": fmt.Sprintf("node/%d", osmID), + }, + Source: "osm", + Status: models.PlaceStatusActive, + }, nil +} diff --git a/internal/osm/transformer_test.go b/internal/osm/transformer_test.go new file mode 100644 index 0000000..c1a69ee --- /dev/null +++ b/internal/osm/transformer_test.go @@ -0,0 +1,110 @@ +/* + * Copyright (C) 2026 InWheel Contributors + * SPDX-License-Identifier: AGPL-3.0-only + */ + +package osm + +import ( + "testing" + + "github.com/InWheelOrg/inwheel-api/pkg/models" +) + +func TestTransformNode(t *testing.T) { + tags := map[string]string{ + "amenity": "restaurant", + "name": "Ravintola Tor", + } + + place, err := TransformNode(123, 60.1699, 24.9384, tags, models.CategoryRestaurant) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if place.OSMID != 123 { + t.Errorf("osm_id: got %d want 123", place.OSMID) + } + if place.OSMType != models.OSMNode { + t.Errorf("osm_type: got %q want %q", place.OSMType, models.OSMNode) + } + if place.Name != "Ravintola Tor" { + t.Errorf("name: got %q", place.Name) + } + if place.Lat != 60.1699 || place.Lng != 24.9384 { + t.Errorf("coords: got (%v, %v)", place.Lat, place.Lng) + } + if place.Category != models.CategoryRestaurant { + t.Errorf("category: got %q", place.Category) + } + if place.Rank != models.RankEstablishment { + t.Errorf("rank: got %d want %d", place.Rank, models.RankEstablishment) + } + if place.ExternalIDs["osm"] != "node/123" { + t.Errorf("external_ids[osm]: got %q want node/123", place.ExternalIDs["osm"]) + } + if place.Source != "osm" { + t.Errorf("source: got %q want osm", place.Source) + } + if place.Status != models.PlaceStatusActive { + t.Errorf("status: got %q want active", place.Status) + } +} + +func TestTransformNode_PreservesTags(t *testing.T) { + tags := map[string]string{ + "amenity": "restaurant", + "name": "Burger Place", + "addr:city": "Helsinki", + } + + place, err := TransformNode(1, 60, 24, tags, models.CategoryRestaurant) + if err != nil { + t.Fatal(err) + } + + for k, v := range tags { + if got := place.Tags[k]; got != v { + t.Errorf("tag %q: got %q want %q", k, got, v) + } + } +} + +func TestTransformNode_EmptyCategoryReturnsError(t *testing.T) { + _, err := TransformNode(1, 60, 24, map[string]string{"amenity": "restaurant"}, "") + if err == nil { + t.Fatal("expected error for empty category, got nil") + } +} + +func TestDeriveRank(t *testing.T) { + cases := []struct { + name string + category models.Category + tags map[string]string + want models.Rank + }{ + {"hospital is landmark", models.CategoryHealthcare, map[string]string{"amenity": "hospital"}, models.RankLandmark}, + {"clinic is establishment", models.CategoryHealthcare, map[string]string{"amenity": "clinic"}, models.RankEstablishment}, + {"university is landmark", models.CategoryEducation, map[string]string{"amenity": "university"}, models.RankLandmark}, + {"school is establishment", models.CategoryEducation, map[string]string{"amenity": "school"}, models.RankEstablishment}, + {"bus_station is landmark", models.CategoryTransport, map[string]string{"amenity": "bus_station"}, models.RankLandmark}, + {"ferry_terminal is landmark", models.CategoryTransport, map[string]string{"amenity": "ferry_terminal"}, models.RankLandmark}, + {"public_transport station is landmark", models.CategoryTransport, map[string]string{"public_transport": "station"}, models.RankLandmark}, + {"taxi is establishment", models.CategoryTransport, map[string]string{"amenity": "taxi"}, models.RankEstablishment}, + {"toilet is feature", models.CategoryToilet, map[string]string{"amenity": "toilets"}, models.RankFeature}, + {"atm is feature", models.CategoryFinance, map[string]string{"amenity": "atm"}, models.RankFeature}, + {"bank is establishment", models.CategoryFinance, map[string]string{"amenity": "bank"}, models.RankEstablishment}, + {"shelter is feature", models.CategorySocial, map[string]string{"amenity": "shelter"}, models.RankFeature}, + {"restaurant is establishment", models.CategoryRestaurant, map[string]string{"amenity": "restaurant"}, models.RankEstablishment}, + } + + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + got := DeriveRank(c.category, c.tags) + if got != c.want { + t.Fatalf("rank: got %d want %d", got, c.want) + } + }) + } +} diff --git a/internal/place/repository.go b/internal/place/repository.go new file mode 100644 index 0000000..e6cf8b7 --- /dev/null +++ b/internal/place/repository.go @@ -0,0 +1,60 @@ +/* + * Copyright (C) 2026 InWheel Contributors + * SPDX-License-Identifier: AGPL-3.0-only + */ + +// Package place owns reads and writes against the places table. +package place + +import ( + "context" + "fmt" + + "gorm.io/gorm" + "gorm.io/gorm/clause" + + "github.com/InWheelOrg/inwheel-api/pkg/models" +) + +// Repository is the data-access layer for places. +type Repository struct { + db *gorm.DB +} + +// NewRepository constructs a Repository backed by the given GORM connection. +func NewRepository(db *gorm.DB) *Repository { + return &Repository{db: db} +} + +// UpsertBatch inserts or updates the given places in a single SQL statement using +// (osm_id, osm_type) as the conflict key. Existing rows have their name, location, +// category, rank, tags, external_ids, and status replaced. Returns an error if the +// underlying SQL fails. An empty or nil batch is a no-op. +func (r *Repository) UpsertBatch(ctx context.Context, places []models.Place) error { + if len(places) == 0 { + return nil + } + + // TargetWhere matches the partial index predicate. The index is defined + // WHERE osm_id <> 0 so test fixtures that create places without setting + // osm_id (zero value) don't collide on the unique constraint. In production, + // every place is OSM-sourced and has a non-zero osm_id, so the predicate + // covers every real row. + tx := r.db.WithContext(ctx).Clauses(clause.OnConflict{ + Columns: []clause.Column{ + {Name: "osm_id"}, + {Name: "osm_type"}, + }, + TargetWhere: clause.Where{Exprs: []clause.Expression{ + clause.Expr{SQL: "osm_id <> 0"}, + }}, + DoUpdates: clause.AssignmentColumns([]string{ + "name", "lat", "lng", "category", "rank", "tags", "external_ids", "status", "updated_at", + }), + }).Create(&places) + + if tx.Error != nil { + return fmt.Errorf("upsert places: %w", tx.Error) + } + return nil +} diff --git a/internal/place/repository_integration_test.go b/internal/place/repository_integration_test.go new file mode 100644 index 0000000..c2e085d --- /dev/null +++ b/internal/place/repository_integration_test.go @@ -0,0 +1,92 @@ +//go:build integration + +/* + * Copyright (C) 2026 InWheel Contributors + * SPDX-License-Identifier: AGPL-3.0-only + */ + +package place_test + +import ( + "context" + "testing" + + "github.com/InWheelOrg/inwheel-api/internal/place" + "github.com/InWheelOrg/inwheel-api/internal/testhelpers" + "github.com/InWheelOrg/inwheel-api/pkg/models" +) + +func TestUpsertBatch_InsertsNewPlaces(t *testing.T) { + ctx := context.Background() + db, cleanup, err := testhelpers.StartPostgres(ctx) + if err != nil { + t.Fatalf("start postgres: %v", err) + } + defer cleanup() + + repo := place.NewRepository(db) + + places := []models.Place{ + {OSMID: 1, OSMType: models.OSMNode, Name: "A", Lat: 60, Lng: 24, Category: models.CategoryRestaurant, Rank: models.RankEstablishment, Source: "osm", Status: models.PlaceStatusActive, ExternalIDs: models.ExternalIDs{"osm": "node/1"}}, + {OSMID: 2, OSMType: models.OSMNode, Name: "B", Lat: 61, Lng: 25, Category: models.CategoryCafe, Rank: models.RankEstablishment, Source: "osm", Status: models.PlaceStatusActive, ExternalIDs: models.ExternalIDs{"osm": "node/2"}}, + } + + if err := repo.UpsertBatch(ctx, places); err != nil { + t.Fatalf("upsert batch: %v", err) + } + + var count int64 + db.Model(&models.Place{}).Count(&count) + if count != 2 { + t.Fatalf("expected 2 rows, got %d", count) + } +} + +func TestUpsertBatch_UpdatesExistingPlace(t *testing.T) { + ctx := context.Background() + db, cleanup, err := testhelpers.StartPostgres(ctx) + if err != nil { + t.Fatalf("start postgres: %v", err) + } + defer cleanup() + + repo := place.NewRepository(db) + + first := []models.Place{{OSMID: 42, OSMType: models.OSMNode, Name: "Original", Lat: 60, Lng: 24, Category: models.CategoryRestaurant, Rank: models.RankEstablishment, Source: "osm", Status: models.PlaceStatusActive, ExternalIDs: models.ExternalIDs{"osm": "node/42"}}} + if err := repo.UpsertBatch(ctx, first); err != nil { + t.Fatalf("first upsert: %v", err) + } + + second := []models.Place{{OSMID: 42, OSMType: models.OSMNode, Name: "Renamed", Lat: 60.5, Lng: 24.5, Category: models.CategoryRestaurant, Rank: models.RankEstablishment, Source: "osm", Status: models.PlaceStatusActive, ExternalIDs: models.ExternalIDs{"osm": "node/42"}}} + if err := repo.UpsertBatch(ctx, second); err != nil { + t.Fatalf("second upsert: %v", err) + } + + var got models.Place + if err := db.Where("osm_id = ?", 42).First(&got).Error; err != nil { + t.Fatalf("fetch updated row: %v", err) + } + if got.Name != "Renamed" { + t.Errorf("name was not updated: got %q", got.Name) + } + if got.Lat != 60.5 || got.Lng != 24.5 { + t.Errorf("coords were not updated: got (%v, %v)", got.Lat, got.Lng) + } +} + +func TestUpsertBatch_EmptySliceIsNoOp(t *testing.T) { + ctx := context.Background() + db, cleanup, err := testhelpers.StartPostgres(ctx) + if err != nil { + t.Fatalf("start postgres: %v", err) + } + defer cleanup() + + repo := place.NewRepository(db) + if err := repo.UpsertBatch(ctx, nil); err != nil { + t.Fatalf("nil batch should be a no-op, got error: %v", err) + } + if err := repo.UpsertBatch(ctx, []models.Place{}); err != nil { + t.Fatalf("empty batch should be a no-op, got error: %v", err) + } +} diff --git a/internal/testhelpers/db.go b/internal/testhelpers/db.go index b6084d7..879d59c 100644 --- a/internal/testhelpers/db.go +++ b/internal/testhelpers/db.go @@ -19,9 +19,33 @@ import ( "gorm.io/gorm" ) +// ConnInfo holds the raw connection parameters for a test container. +// Use it when you need to open a second, independent database connection +// (e.g. to test code paths that manage their own connection lifecycle). +type ConnInfo struct { + Host string + Port int + User string + Password string + Name string + SSLMode string +} + // StartPostgres starts a PostGIS container, runs migrations, and returns a connected *gorm.DB. // The returned cleanup function terminates the container and must be called when tests are done. func StartPostgres(ctx context.Context) (*gorm.DB, func(), error) { + db, _, cleanup, err := startPostgresInner(ctx) + return db, cleanup, err +} + +// StartPostgresWithConnInfo is like StartPostgres but additionally returns the raw +// connection parameters so callers can open their own independent connections. +func StartPostgresWithConnInfo(ctx context.Context) (*gorm.DB, ConnInfo, func(), error) { + return startPostgresInner(ctx) +} + +// startPostgresInner is the shared implementation. +func startPostgresInner(ctx context.Context) (*gorm.DB, ConnInfo, func(), error) { container, err := tcpostgres.Run(ctx, "postgis/postgis:18-3.6", tcpostgres.WithDatabase("inwheel_test"), tcpostgres.WithUsername("test"), @@ -33,7 +57,7 @@ func StartPostgres(ctx context.Context) (*gorm.DB, func(), error) { ), ) if err != nil { - return nil, nil, fmt.Errorf("start postgres container: %w", err) + return nil, ConnInfo{}, nil, fmt.Errorf("start postgres container: %w", err) } cleanup := func() { @@ -43,32 +67,41 @@ func StartPostgres(ctx context.Context) (*gorm.DB, func(), error) { host, err := container.Host(ctx) if err != nil { cleanup() - return nil, nil, fmt.Errorf("get container host: %w", err) + return nil, ConnInfo{}, nil, fmt.Errorf("get container host: %w", err) } port, err := container.MappedPort(ctx, "5432/tcp") if err != nil { cleanup() - return nil, nil, fmt.Errorf("get container port: %w", err) + return nil, ConnInfo{}, nil, fmt.Errorf("get container port: %w", err) } - gormDB, err := internaldb.Connect(internaldb.Config{ + info := ConnInfo{ Host: host, Port: int(port.Num()), User: "test", Password: "test", Name: "inwheel_test", SSLMode: "disable", + } + + gormDB, err := internaldb.Connect(internaldb.Config{ + Host: info.Host, + Port: info.Port, + User: info.User, + Password: info.Password, + Name: info.Name, + SSLMode: info.SSLMode, }) if err != nil { cleanup() - return nil, nil, fmt.Errorf("connect to test db: %w", err) + return nil, ConnInfo{}, nil, fmt.Errorf("connect to test db: %w", err) } if err := internaldb.Migrate(gormDB); err != nil { cleanup() - return nil, nil, fmt.Errorf("migrate test db: %w", err) + return nil, ConnInfo{}, nil, fmt.Errorf("migrate test db: %w", err) } - return gormDB, cleanup, nil + return gormDB, info, cleanup, nil } diff --git a/pkg/models/place.go b/pkg/models/place.go index 3585bd4..c47359c 100644 --- a/pkg/models/place.go +++ b/pkg/models/place.go @@ -40,16 +40,25 @@ const ( type Category string const ( - CategoryMall Category = "mall" - CategoryAirport Category = "airport" - CategoryTrainStation Category = "train_station" - CategoryRestaurant Category = "restaurant" - CategoryCafe Category = "cafe" - CategoryShop Category = "shop" - CategoryToilet Category = "toilet" - CategoryParking Category = "parking" - CategoryEntrance Category = "entrance" - CategoryOther Category = "other" + CategoryMall Category = "mall" + CategoryAirport Category = "airport" + CategoryTrainStation Category = "train_station" + CategoryRestaurant Category = "restaurant" + CategoryCafe Category = "cafe" + CategoryBar Category = "bar" + CategoryShop Category = "shop" + CategoryHealthcare Category = "healthcare" + CategoryEducation Category = "education" + CategoryFinance Category = "finance" + CategoryEntertainment Category = "entertainment" + CategoryGovernment Category = "government" + CategoryTransport Category = "transport" + CategorySocial Category = "social" + CategoryWorship Category = "worship" + CategoryToilet Category = "toilet" + CategoryParking Category = "parking" + CategoryEntrance Category = "entrance" + CategoryOther Category = "other" ) // OSMType represents the OpenStreetMap data type. diff --git a/testdata/andorra-sample.osm.pbf b/testdata/andorra-sample.osm.pbf new file mode 100644 index 0000000..ada5325 Binary files /dev/null and b/testdata/andorra-sample.osm.pbf differ