From 147f1ea2d919983d7d9e056056c9399034d60899 Mon Sep 17 00:00:00 2001 From: Naved Date: Tue, 26 May 2026 08:29:10 -0700 Subject: [PATCH 01/11] feat(code-index): add Semble as a local on-the-fly embedding provider --- packages/types/src/codebase-index.ts | 4 + packages/types/src/embedding.ts | 3 +- packages/types/src/vscode-extension-host.ts | 2 + src/core/webview/ClineProvider.ts | 2 + src/core/webview/webviewMessageHandler.ts | 1 + src/services/code-index/config-manager.ts | 27 ++ src/services/code-index/interfaces/config.ts | 1 + src/services/code-index/interfaces/manager.ts | 1 + src/services/code-index/manager.ts | 73 +++- .../semble/__tests__/provider.spec.ts | 330 ++++++++++++++++++ .../semble/__tests__/semble-cli.spec.ts | 322 +++++++++++++++++ src/services/code-index/semble/index.ts | 4 + src/services/code-index/semble/provider.ts | 203 +++++++++++ src/services/code-index/semble/semble-cli.ts | 257 ++++++++++++++ src/services/code-index/semble/types.ts | 90 +++++ src/services/code-index/service-factory.ts | 13 + src/shared/embeddingModels.ts | 6 + .../src/components/chat/CodeIndexPopover.tsx | 149 +++++--- webview-ui/src/i18n/locales/en/settings.json | 9 + 19 files changed, 1443 insertions(+), 54 deletions(-) create mode 100644 src/services/code-index/semble/__tests__/provider.spec.ts create mode 100644 src/services/code-index/semble/__tests__/semble-cli.spec.ts create mode 100644 src/services/code-index/semble/index.ts create mode 100644 src/services/code-index/semble/provider.ts create mode 100644 src/services/code-index/semble/semble-cli.ts create mode 100644 src/services/code-index/semble/types.ts diff --git a/packages/types/src/codebase-index.ts b/packages/types/src/codebase-index.ts index 61009ba301..ff6822d79a 100644 --- a/packages/types/src/codebase-index.ts +++ b/packages/types/src/codebase-index.ts @@ -31,6 +31,7 @@ export const codebaseIndexConfigSchema = z.object({ "vercel-ai-gateway", "bedrock", "openrouter", + "semble", ]) .optional(), codebaseIndexEmbedderBaseUrl: z.string().optional(), @@ -50,6 +51,8 @@ export const codebaseIndexConfigSchema = z.object({ codebaseIndexBedrockProfile: z.string().optional(), // OpenRouter specific fields codebaseIndexOpenRouterSpecificProvider: z.string().optional(), + // Semble specific fields + codebaseIndexSemblePath: z.string().optional(), }) export type CodebaseIndexConfig = z.infer @@ -67,6 +70,7 @@ export const codebaseIndexModelsSchema = z.object({ "vercel-ai-gateway": z.record(z.string(), z.object({ dimension: z.number() })).optional(), openrouter: z.record(z.string(), z.object({ dimension: z.number() })).optional(), bedrock: z.record(z.string(), z.object({ dimension: z.number() })).optional(), + semble: z.record(z.string(), z.object({ dimension: z.number() })).optional(), }) export type CodebaseIndexModels = z.infer diff --git a/packages/types/src/embedding.ts b/packages/types/src/embedding.ts index 1c5a92e1ac..f0597d96c6 100644 --- a/packages/types/src/embedding.ts +++ b/packages/types/src/embedding.ts @@ -6,7 +6,8 @@ export type EmbedderProvider = | "mistral" | "vercel-ai-gateway" | "bedrock" - | "openrouter" // Add other providers as needed. + | "openrouter" + | "semble" // Local hybrid search via semble CLI — no API keys or Qdrant required. export interface EmbeddingModelProfile { dimension: number diff --git a/packages/types/src/vscode-extension-host.ts b/packages/types/src/vscode-extension-host.ts index c09f22aed7..92a0c6d4c5 100644 --- a/packages/types/src/vscode-extension-host.ts +++ b/packages/types/src/vscode-extension-host.ts @@ -661,6 +661,7 @@ export interface WebviewMessage { | "vercel-ai-gateway" | "bedrock" | "openrouter" + | "semble" codebaseIndexEmbedderBaseUrl?: string codebaseIndexEmbedderModelId: string codebaseIndexEmbedderModelDimension?: number // Generic dimension for all providers @@ -670,6 +671,7 @@ export interface WebviewMessage { codebaseIndexSearchMaxResults?: number codebaseIndexSearchMinScore?: number codebaseIndexOpenRouterSpecificProvider?: string // OpenRouter provider routing + codebaseIndexSemblePath?: string // Path to the semble executable // Secret settings codeIndexOpenAiKey?: string diff --git a/src/core/webview/ClineProvider.ts b/src/core/webview/ClineProvider.ts index 3f5af94cae..40590056d0 100644 --- a/src/core/webview/ClineProvider.ts +++ b/src/core/webview/ClineProvider.ts @@ -2253,6 +2253,7 @@ export class ClineProvider codebaseIndexBedrockRegion: codebaseIndexConfig?.codebaseIndexBedrockRegion, codebaseIndexBedrockProfile: codebaseIndexConfig?.codebaseIndexBedrockProfile, codebaseIndexOpenRouterSpecificProvider: codebaseIndexConfig?.codebaseIndexOpenRouterSpecificProvider, + codebaseIndexSemblePath: codebaseIndexConfig?.codebaseIndexSemblePath, }, // Phase 1 cloud removal: do not let Cloud-auth MDM enforcement force login-only UI flows. mdmCompliant: undefined, @@ -2455,6 +2456,7 @@ export class ClineProvider codebaseIndexBedrockProfile: stateValues.codebaseIndexConfig?.codebaseIndexBedrockProfile, codebaseIndexOpenRouterSpecificProvider: stateValues.codebaseIndexConfig?.codebaseIndexOpenRouterSpecificProvider, + codebaseIndexSemblePath: stateValues.codebaseIndexConfig?.codebaseIndexSemblePath, }, profileThresholds: stateValues.profileThresholds ?? {}, lockApiConfigAcrossModes: this.context.workspaceState.get("lockApiConfigAcrossModes", false), diff --git a/src/core/webview/webviewMessageHandler.ts b/src/core/webview/webviewMessageHandler.ts index 429de051b8..a13595e7d9 100644 --- a/src/core/webview/webviewMessageHandler.ts +++ b/src/core/webview/webviewMessageHandler.ts @@ -2500,6 +2500,7 @@ export const webviewMessageHandler = async ( codebaseIndexSearchMaxResults: settings.codebaseIndexSearchMaxResults, codebaseIndexSearchMinScore: settings.codebaseIndexSearchMinScore, codebaseIndexOpenRouterSpecificProvider: settings.codebaseIndexOpenRouterSpecificProvider, + codebaseIndexSemblePath: settings.codebaseIndexSemblePath, } // Save global state first diff --git a/src/services/code-index/config-manager.ts b/src/services/code-index/config-manager.ts index e7f239e621..9984ca4dc4 100644 --- a/src/services/code-index/config-manager.ts +++ b/src/services/code-index/config-manager.ts @@ -22,6 +22,7 @@ export class CodeIndexConfigManager { private vercelAiGatewayOptions?: { apiKey: string } private bedrockOptions?: { region: string; profile?: string } private openRouterOptions?: { apiKey: string; specificProvider?: string } + private semblePath?: string private qdrantUrl?: string = "http://localhost:6333" private qdrantApiKey?: string private searchMinScore?: number @@ -120,6 +121,8 @@ export class CodeIndexConfigManager { this.embedderProvider = "bedrock" } else if (codebaseIndexEmbedderProvider === "openrouter") { this.embedderProvider = "openrouter" + } else if (codebaseIndexEmbedderProvider === "semble") { + this.embedderProvider = "semble" } else { this.embedderProvider = "openai" } @@ -148,6 +151,9 @@ export class CodeIndexConfigManager { this.bedrockOptions = bedrockRegion ? { region: bedrockRegion, profile: bedrockProfile || undefined } : undefined + + // Semble path (optional — defaults to "semble" in the SembleProvider) + this.semblePath = codebaseIndexConfig.codebaseIndexSemblePath || undefined } /** @@ -194,6 +200,7 @@ export class CodeIndexConfigManager { openRouterSpecificProvider: this.openRouterOptions?.specificProvider ?? "", qdrantUrl: this.qdrantUrl ?? "", qdrantApiKey: this.qdrantApiKey ?? "", + semblePath: this.semblePath ?? "", } // Refresh secrets from VSCode storage to ensure we have the latest values @@ -231,6 +238,11 @@ export class CodeIndexConfigManager { * Checks if the service is properly configured based on the embedder type. */ public isConfigured(): boolean { + if (this.embedderProvider === "semble") { + // Semble requires no API keys or Qdrant — it's always configured + return true + } + if (this.embedderProvider === "openai") { const openAiKey = this.openAiOptions?.openAiNativeApiKey const qdrantUrl = this.qdrantUrl @@ -405,6 +417,13 @@ export class CodeIndexConfigManager { return true } + // Semble path change requires restart to use the new executable + const prevSemblePath = prev?.semblePath ?? "" + const currentSemblePath = this.semblePath ?? "" + if (prevSemblePath !== currentSemblePath) { + return true + } + // Vector dimension changes (still important for compatibility) if (this._hasVectorDimensionChanged(prevProvider, prev?.modelId)) { return true @@ -541,4 +560,12 @@ export class CodeIndexConfigManager { public get currentSearchMaxResults(): number { return this.searchMaxResults ?? DEFAULT_MAX_SEARCH_RESULTS } + + /** + * Gets the configured path to the semble executable. + * Returns undefined if not explicitly configured (provider will use default "semble"). + */ + public get currentSemblePath(): string | undefined { + return this.semblePath + } } diff --git a/src/services/code-index/interfaces/config.ts b/src/services/code-index/interfaces/config.ts index f52f98aaa0..e63f7a5c4d 100644 --- a/src/services/code-index/interfaces/config.ts +++ b/src/services/code-index/interfaces/config.ts @@ -45,4 +45,5 @@ export type PreviousConfigSnapshot = { openRouterSpecificProvider?: string qdrantUrl?: string qdrantApiKey?: string + semblePath?: string } diff --git a/src/services/code-index/interfaces/manager.ts b/src/services/code-index/interfaces/manager.ts index d657ad667c..cdda7a7053 100644 --- a/src/services/code-index/interfaces/manager.ts +++ b/src/services/code-index/interfaces/manager.ts @@ -84,6 +84,7 @@ export type EmbedderProvider = | "vercel-ai-gateway" | "bedrock" | "openrouter" + | "semble" export interface IndexProgressUpdate { systemStatus: IndexingState diff --git a/src/services/code-index/manager.ts b/src/services/code-index/manager.ts index 91ea515e40..3146d4f05e 100644 --- a/src/services/code-index/manager.ts +++ b/src/services/code-index/manager.ts @@ -8,6 +8,7 @@ import { CodeIndexServiceFactory } from "./service-factory" import { CodeIndexSearchService } from "./search-service" import { CodeIndexOrchestrator } from "./orchestrator" import { CacheManager } from "./cache-manager" +import { SembleProvider } from "./semble" import { RooIgnoreController } from "../../core/ignore/RooIgnoreController" import fs from "fs/promises" import ignore from "ignore" @@ -27,6 +28,7 @@ export class CodeIndexManager { private _orchestrator: CodeIndexOrchestrator | undefined private _searchService: CodeIndexSearchService | undefined private _cacheManager: CacheManager | undefined + private _sembleProvider: SembleProvider | undefined // Flag to prevent race conditions during error recovery private _isRecoveringFromError = false @@ -125,6 +127,10 @@ export class CodeIndexManager { } private assertInitialized() { + if (this._sembleProvider) { + // When semble is active, we don't need orchestrator/searchService + return + } if (!this._configManager || !this._orchestrator || !this._searchService || !this._cacheManager) { throw new Error("CodeIndexManager not initialized. Call initialize() first.") } @@ -134,6 +140,9 @@ export class CodeIndexManager { if (!this.isFeatureEnabled) { return "Standby" } + if (this._sembleProvider) { + return this._sembleProvider.state + } this.assertInitialized() return this._orchestrator!.state } @@ -173,6 +182,9 @@ export class CodeIndexManager { if (this._orchestrator) { this._orchestrator.stopWatcher() } + if (this._sembleProvider) { + this._sembleProvider.stopIndexing() + } return { requiresRestart } } @@ -196,19 +208,27 @@ export class CodeIndexManager { } // 6. Determine if Core Services Need Recreation - const needsServiceRecreation = !this._serviceFactory || requiresRestart + const needsServiceRecreation = (!this._serviceFactory && !this._sembleProvider) || requiresRestart if (needsServiceRecreation) { await this._recreateServices() } // 7. Handle Indexing Start/Restart - const shouldStartOrRestartIndexing = - requiresRestart || - (needsServiceRecreation && (!this._orchestrator || this._orchestrator.state !== "Indexing")) + if (this._sembleProvider) { + // For semble, start indexing if needed + const shouldStartIndexing = requiresRestart || needsServiceRecreation + if (shouldStartIndexing) { + await this._sembleProvider.startIndexing() + } + } else { + const shouldStartOrRestartIndexing = + requiresRestart || + (needsServiceRecreation && (!this._orchestrator || this._orchestrator.state !== "Indexing")) - if (shouldStartOrRestartIndexing) { - this._orchestrator?.startIndexing() + if (shouldStartOrRestartIndexing) { + this._orchestrator?.startIndexing() + } } return { requiresRestart } @@ -226,6 +246,12 @@ export class CodeIndexManager { return } + // Delegate to semble provider if active + if (this._sembleProvider) { + await this._sembleProvider.startIndexing() + return + } + // Check if we're in error state and recover if needed const currentStatus = this.getCurrentStatus() if (currentStatus.systemStatus === "Error") { @@ -244,6 +270,10 @@ export class CodeIndexManager { * Stops any in-progress indexing operation and the file watcher. */ public stopIndexing(): void { + if (this._sembleProvider) { + this._sembleProvider.stopIndexing() + return + } if (this._orchestrator) { this._orchestrator.stopIndexing() } @@ -295,6 +325,7 @@ export class CodeIndexManager { this._serviceFactory = undefined this._orchestrator = undefined this._searchService = undefined + this._sembleProvider = undefined // Reset the flag after recovery is complete this._isRecoveringFromError = false @@ -306,6 +337,10 @@ export class CodeIndexManager { */ public dispose(): void { this.stopIndexing() + if (this._sembleProvider) { + this._sembleProvider.dispose() + this._sembleProvider = undefined + } this._stateManager.dispose() } @@ -317,6 +352,10 @@ export class CodeIndexManager { if (!this.isFeatureEnabled) { return } + if (this._sembleProvider) { + await this._sembleProvider.clearIndexData() + return + } this.assertInitialized() await this._orchestrator!.clearIndexData() await this._cacheManager!.clearCacheFile() @@ -338,6 +377,9 @@ export class CodeIndexManager { if (!this.isFeatureEnabled) { return [] } + if (this._sembleProvider) { + return this._sembleProvider.searchIndex(query, directoryPrefix) + } this.assertInitialized() return this._searchService!.searchIndex(query, directoryPrefix) } @@ -351,11 +393,28 @@ export class CodeIndexManager { if (this._orchestrator) { this.stopWatcher() } + // Dispose existing semble provider if switching away + if (this._sembleProvider) { + this._sembleProvider.dispose() + this._sembleProvider = undefined + } // Clear existing services to ensure clean state this._orchestrator = undefined this._searchService = undefined - // (Re)Initialize service factory + // Branch: if provider is "semble", create SembleProvider instead of external services + if (this._configManager!.currentEmbedderProvider === "semble") { + this._sembleProvider = new SembleProvider( + this.workspacePath, + this.context, + this._stateManager, + this._configManager!.currentSemblePath, + ) + await this._sembleProvider.initialize() + return + } + + // (Re)Initialize service factory for external providers this._serviceFactory = new CodeIndexServiceFactory( this._configManager!, this.workspacePath, diff --git a/src/services/code-index/semble/__tests__/provider.spec.ts b/src/services/code-index/semble/__tests__/provider.spec.ts new file mode 100644 index 0000000000..8039ac5d65 --- /dev/null +++ b/src/services/code-index/semble/__tests__/provider.spec.ts @@ -0,0 +1,330 @@ +import { describe, it, expect, vi, beforeEach } from "vitest" +import { SembleProvider } from "../provider" +import { SembleCLI } from "../semble-cli" +import { SEMBLE_DEFAULTS } from "../types" + +// Mock SembleCLI +vi.mock("../semble-cli", () => ({ + SembleCLI: vi.fn().mockImplementation(() => ({ + checkInstalled: vi.fn(), + search: vi.fn(), + findRelated: vi.fn(), + })), +})) + +// Mock TelemetryService +vi.mock("@roo-code/telemetry", () => ({ + TelemetryService: { + instance: { + captureEvent: vi.fn(), + }, + }, +})) + +// Mock vscode +vi.mock("vscode", () => ({ + ExtensionContext: vi.fn(), +})) + +import { TelemetryService } from "@roo-code/telemetry" +import { TelemetryEventName } from "@roo-code/types" + +describe("SembleProvider", () => { + let provider: SembleProvider + let mockCli: any + let mockStateManager: any + let mockContext: any + + beforeEach(() => { + vi.clearAllMocks() + + mockStateManager = { + setSystemState: vi.fn(), + } + + mockContext = {} + + provider = new SembleProvider("/workspace", mockContext, mockStateManager, "semble") + mockCli = (SembleCLI as any).mock.results[0].value + }) + + describe("constructor", () => { + it("should create provider with default options", () => { + const p = new SembleProvider("/workspace", mockContext, mockStateManager) + expect(p).toBeDefined() + expect(p.state).toBe("Standby") + }) + + it("should create provider with custom semble path", () => { + const p = new SembleProvider("/workspace", mockContext, mockStateManager, "/usr/local/bin/semble") + expect(p).toBeDefined() + }) + + it("should create provider with custom topK and content", () => { + const p = new SembleProvider("/workspace", mockContext, mockStateManager, "semble", { + topK: 5, + content: "all", + }) + expect(p).toBeDefined() + }) + }) + + describe("initialize", () => { + it("should set state to Indexed when semble is installed", async () => { + mockCli.checkInstalled.mockResolvedValue({ installed: true, version: "unknown" }) + + await provider.initialize() + + expect(provider.state).toBe("Indexed") + expect(mockStateManager.setSystemState).toHaveBeenCalledWith( + "Indexed", + "Semble is ready. Searches index on-the-fly.", + ) + }) + + it("should set state to Error when semble is not installed", async () => { + mockCli.checkInstalled.mockResolvedValue({ + installed: false, + error: "semble: command not found", + }) + + await provider.initialize() + + expect(provider.state).toBe("Error") + expect(mockStateManager.setSystemState).toHaveBeenCalledWith( + "Error", + expect.stringContaining("semble: command not found"), + ) + }) + + it("should not re-initialize if already initialized", async () => { + mockCli.checkInstalled.mockResolvedValue({ installed: true, version: "unknown" }) + + await provider.initialize() + await provider.initialize() + + expect(mockCli.checkInstalled).toHaveBeenCalledTimes(1) + }) + }) + + describe("startIndexing", () => { + it("should initialize if not already initialized", async () => { + mockCli.checkInstalled.mockResolvedValue({ installed: true, version: "unknown" }) + + await provider.startIndexing() + + expect(provider.state).toBe("Indexed") + }) + + it("should not change state if in Error state", async () => { + mockCli.checkInstalled.mockResolvedValue({ + installed: false, + error: "not found", + }) + + await provider.initialize() + await provider.startIndexing() + + expect(provider.state).toBe("Error") + }) + + it("should mark as Indexed when already initialized", async () => { + mockCli.checkInstalled.mockResolvedValue({ installed: true, version: "unknown" }) + + await provider.initialize() + await provider.startIndexing() + + expect(provider.state).toBe("Indexed") + }) + }) + + describe("stopIndexing", () => { + it("should be a no-op", () => { + provider.stopIndexing() + // No error thrown, no state change + expect(provider.state).toBe("Standby") + }) + }) + + describe("searchIndex", () => { + beforeEach(async () => { + mockCli.checkInstalled.mockResolvedValue({ installed: true, version: "unknown" }) + await provider.initialize() + }) + + it("should return empty array when not initialized", async () => { + const uninitializedProvider = new SembleProvider("/workspace", mockContext, mockStateManager) + const results = await uninitializedProvider.searchIndex("test query") + expect(results).toEqual([]) + }) + + it("should search using CLI and convert results", async () => { + const mockResults = [ + { + chunk: { + content: "function authenticate() {}", + file_path: "src/auth.ts", + start_line: 10, + end_line: 25, + language: "typescript", + location: "src/auth.ts:10-25", + }, + score: 0.92, + }, + { + chunk: { + content: "export function login() {}", + file_path: "src/login.ts", + start_line: 5, + end_line: 15, + language: "typescript", + location: "src/login.ts:5-15", + }, + score: 0.78, + }, + ] + + mockCli.search.mockResolvedValue(mockResults) + + const results = await provider.searchIndex("authentication") + + expect(mockCli.search).toHaveBeenCalledWith("authentication", "/workspace", { + topK: SEMBLE_DEFAULTS.DEFAULT_TOP_K, + content: SEMBLE_DEFAULTS.DEFAULT_CONTENT, + }) + + expect(results).toHaveLength(2) + expect(results[0]).toEqual({ + id: "semble-0", + score: 0.92, + payload: { + filePath: "/workspace/src/auth.ts", + codeChunk: "function authenticate() {}", + startLine: 10, + endLine: 25, + }, + }) + expect(results[1]).toEqual({ + id: "semble-1", + score: 0.78, + payload: { + filePath: "/workspace/src/login.ts", + codeChunk: "export function login() {}", + startLine: 5, + endLine: 15, + }, + }) + }) + + it("should filter out results with missing file_path", async () => { + const mockResults = [ + { + chunk: { + content: "good result", + file_path: "src/good.ts", + start_line: 1, + end_line: 10, + language: "typescript", + location: "src/good.ts:1-10", + }, + score: 0.8, + }, + { + chunk: { + content: "no file path result", + file_path: "", + start_line: 1, + end_line: 5, + language: "typescript", + location: "", + }, + score: 0.5, + }, + { + chunk: { + content: "null file path result", + file_path: null, + start_line: 1, + end_line: 5, + language: null, + location: "", + }, + score: 0.3, + }, + ] + + mockCli.search.mockResolvedValue(mockResults) + + const results = await provider.searchIndex("test") + + expect(results).toHaveLength(1) + expect(results[0].payload?.filePath).toBe("/workspace/src/good.ts") + }) + + it("should use directoryPrefix when provided", async () => { + mockCli.search.mockResolvedValue([]) + + await provider.searchIndex("test", "/custom/path") + + expect(mockCli.search).toHaveBeenCalledWith("test", "/custom/path", { + topK: SEMBLE_DEFAULTS.DEFAULT_TOP_K, + content: SEMBLE_DEFAULTS.DEFAULT_CONTENT, + }) + }) + + it("should return empty array on search error and log telemetry", async () => { + mockCli.search.mockRejectedValue(new Error("Search failed")) + + const results = await provider.searchIndex("test") + + expect(results).toEqual([]) + expect(TelemetryService.instance.captureEvent).toHaveBeenCalledWith( + TelemetryEventName.CODE_INDEX_ERROR, + expect.objectContaining({ + location: "SembleProvider.searchIndex", + }), + ) + }) + + it("should return empty array when in Error state", async () => { + const errorProvider = new SembleProvider("/workspace", mockContext, mockStateManager) + const errorCli = (SembleCLI as any).mock.results[(SembleCLI as any).mock.results.length - 1].value + errorCli.checkInstalled.mockResolvedValue({ + installed: false, + error: "not found", + }) + await errorProvider.initialize() + + const results = await errorProvider.searchIndex("test") + expect(results).toEqual([]) + }) + }) + + describe("clearIndexData", () => { + it("should reset state to Standby", async () => { + mockCli.checkInstalled.mockResolvedValue({ installed: true, version: "unknown" }) + await provider.initialize() + + await provider.clearIndexData() + + expect(provider.state).toBe("Standby") + expect(mockStateManager.setSystemState).toHaveBeenCalledWith( + "Standby", + "Semble provider reset. On-disk cache remains until next rebuild.", + ) + }) + }) + + describe("dispose", () => { + it("should reset initialization state", async () => { + mockCli.checkInstalled.mockResolvedValue({ installed: true, version: "unknown" }) + await provider.initialize() + + provider.dispose() + + // After dispose, searchIndex should return empty array + const results = await provider.searchIndex("test") + expect(results).toEqual([]) + }) + }) +}) \ No newline at end of file diff --git a/src/services/code-index/semble/__tests__/semble-cli.spec.ts b/src/services/code-index/semble/__tests__/semble-cli.spec.ts new file mode 100644 index 0000000000..767a3e976f --- /dev/null +++ b/src/services/code-index/semble/__tests__/semble-cli.spec.ts @@ -0,0 +1,322 @@ +import { describe, it, expect, vi, beforeEach } from "vitest" +import { EventEmitter } from "events" +import { SembleCLI } from "../semble-cli" + +// Mock spawn +const mockSpawn = vi.fn() + +vi.mock("child_process", () => ({ + spawn: (...args: any[]) => mockSpawn(...args), +})) + +/** + * Helper to create a fake child process that emits stdout/stderr and closes. + */ +function createMockProcess(stdout: string, stderr: string, exitCode: number) { + const proc = new EventEmitter() as any + proc.stdout = new EventEmitter() + proc.stderr = new EventEmitter() + + // Schedule data emission and close on next tick + setImmediate(() => { + if (stdout) proc.stdout.emit("data", Buffer.from(stdout)) + if (stderr) proc.stderr.emit("data", Buffer.from(stderr)) + proc.emit("close", exitCode) + }) + + return proc +} + +/** + * Helper to create a mock process that emits an error. + */ +function createErrorProcess(errorMessage: string) { + const proc = new EventEmitter() as any + proc.stdout = new EventEmitter() + proc.stderr = new EventEmitter() + + setImmediate(() => { + proc.emit("error", new Error(errorMessage)) + }) + + return proc +} + +describe("SembleCLI", () => { + let cli: SembleCLI + + beforeEach(() => { + vi.clearAllMocks() + cli = new SembleCLI("semble") + }) + + describe("constructor", () => { + it("should use default semble path", () => { + const defaultCli = new SembleCLI() + expect(defaultCli).toBeDefined() + }) + + it("should accept a custom path to the semble executable", () => { + const customCli = new SembleCLI("/usr/local/bin/semble") + expect(customCli).toBeDefined() + }) + }) + + describe("checkInstalled", () => { + it("should return installed: true with version when --help and pip show succeed", async () => { + // First call: semble --help; second call: pip show semble + mockSpawn + .mockReturnValueOnce(createMockProcess("usage: semble ...", "", 0)) + .mockReturnValueOnce(createMockProcess("Name: semble\nVersion: 0.3.1\nSummary: ...", "", 0)) + + const result = await cli.checkInstalled() + + expect(result).toEqual({ installed: true, version: "0.3.1" }) + expect(mockSpawn).toHaveBeenNthCalledWith(1, "semble", ["--help"], expect.objectContaining({ shell: false })) + expect(mockSpawn).toHaveBeenNthCalledWith(2, "pip", ["show", "semble"], expect.any(Object)) + }) + + it("should fall back to pip3 if pip is not available", async () => { + mockSpawn + .mockReturnValueOnce(createMockProcess("usage: semble ...", "", 0)) // --help + .mockReturnValueOnce(createErrorProcess("spawn ENOENT")) // pip fails + .mockReturnValueOnce(createMockProcess("Name: semble\nVersion: 0.3.2\n", "", 0)) // pip3 succeeds + + const result = await cli.checkInstalled() + + expect(result).toEqual({ installed: true, version: "0.3.2" }) + }) + + it("should return installed: true with unknown version when pip cannot find semble", async () => { + mockSpawn + .mockReturnValueOnce(createMockProcess("usage: semble ...", "", 0)) // --help + .mockReturnValueOnce(createMockProcess("", "WARNING: Package(s) not found: semble", 1)) // pip not found + .mockReturnValueOnce(createMockProcess("", "WARNING: Package(s) not found: semble", 1)) // pip3 not found + + const result = await cli.checkInstalled() + + expect(result).toEqual({ installed: true, version: "unknown" }) + }) + + it("should return installed: false when version is below 0.3.0", async () => { + mockSpawn + .mockReturnValueOnce(createMockProcess("usage: semble ...", "", 0)) // --help + .mockReturnValueOnce(createMockProcess("Name: semble\nVersion: 0.2.5\n", "", 0)) // pip show + + const result = await cli.checkInstalled() + + expect(result.installed).toBe(false) + expect(result.error).toContain("0.2.5") + expect(result.error).toContain(">= 0.3.0") + expect(result.error).toContain("pip install --upgrade semble") + }) + + it("should return installed: false when semble --help fails", async () => { + mockSpawn.mockReturnValueOnce(createMockProcess("", "semble: command not found", 127)) + + const result = await cli.checkInstalled() + + expect(result.installed).toBe(false) + expect(result.error).toContain("semble: command not found") + }) + + it("should return installed: false on spawn error", async () => { + mockSpawn.mockReturnValueOnce(createErrorProcess("spawn ENOENT")) + + const result = await cli.checkInstalled() + + expect(result.installed).toBe(false) + expect(result.error).toContain("spawn ENOENT") + }) + }) + + describe("search", () => { + it("should spawn with array args (no shell)", async () => { + const jsonResponse = JSON.stringify({ query: "auth", results: [] }) + mockSpawn.mockReturnValue(createMockProcess(jsonResponse, "", 0)) + + await cli.search("authentication", "/path/to/repo") + + expect(mockSpawn).toHaveBeenCalledWith( + "semble", + ["search", "authentication", "/path/to/repo", "-k", "10"], + expect.objectContaining({ shell: false }), + ) + }) + + it("should pass special characters safely in query (no shell interpretation)", async () => { + const jsonResponse = JSON.stringify({ query: "test", results: [] }) + mockSpawn.mockReturnValue(createMockProcess(jsonResponse, "", 0)) + + await cli.search('test $(rm -rf /) `whoami` "injection"', "/repo") + + // With spawn (no shell), these are just string args — not interpreted + expect(mockSpawn).toHaveBeenCalledWith( + "semble", + ["search", 'test $(rm -rf /) `whoami` "injection"', "/repo", "-k", "10"], + expect.objectContaining({ shell: false }), + ) + }) + + it("should build correct args with custom topK", async () => { + mockSpawn.mockReturnValue(createMockProcess(JSON.stringify({ query: "test", results: [] }), "", 0)) + + await cli.search("test", "/repo", { topK: 5 }) + + expect(mockSpawn).toHaveBeenCalledWith( + "semble", + ["search", "test", "/repo", "-k", "5"], + expect.any(Object), + ) + }) + + it("should add --content flag for non-default content types", async () => { + mockSpawn.mockReturnValue(createMockProcess(JSON.stringify({ query: "test", results: [] }), "", 0)) + + await cli.search("test", "/repo", { content: "all" }) + + expect(mockSpawn).toHaveBeenCalledWith( + "semble", + ["search", "test", "/repo", "-k", "10", "--content", "all"], + expect.any(Object), + ) + }) + + it("should not add --content flag for code (default)", async () => { + mockSpawn.mockReturnValue(createMockProcess(JSON.stringify({ query: "test", results: [] }), "", 0)) + + await cli.search("test", "/repo", { content: "code" }) + + expect(mockSpawn).toHaveBeenCalledWith( + "semble", + ["search", "test", "/repo", "-k", "10"], + expect.any(Object), + ) + }) + + it("should throw error when semble search fails", async () => { + mockSpawn.mockReturnValue(createMockProcess("", "Error: something went wrong", 1)) + + await expect(cli.search("test", "/repo")).rejects.toThrow("Semble search failed") + }) + }) + + describe("findRelated", () => { + it("should build correct args with default options", async () => { + mockSpawn.mockReturnValue(createMockProcess(JSON.stringify({ query: "related", results: [] }), "", 0)) + + await cli.findRelated("src/auth.ts", 42, "/repo") + + expect(mockSpawn).toHaveBeenCalledWith( + "semble", + ["find-related", "src/auth.ts", "42", "/repo", "-k", "10"], + expect.any(Object), + ) + }) + + it("should build correct args with custom topK and content", async () => { + mockSpawn.mockReturnValue(createMockProcess(JSON.stringify({ query: "related", results: [] }), "", 0)) + + await cli.findRelated("src/auth.ts", 42, "/repo", { topK: 3, content: "all" }) + + expect(mockSpawn).toHaveBeenCalledWith( + "semble", + ["find-related", "src/auth.ts", "42", "/repo", "-k", "3", "--content", "all"], + expect.any(Object), + ) + }) + + it("should throw error when semble find-related fails", async () => { + mockSpawn.mockReturnValue(createMockProcess("", "Error: no chunk found", 1)) + + await expect(cli.findRelated("src/auth.ts", 42, "/repo")).rejects.toThrow( + "Semble find-related failed", + ) + }) + }) + + describe("_parseOutput (via search)", () => { + it("should parse v0.3.0+ JSON format with nested chunk", async () => { + const jsonResponse = { + query: "authentication", + results: [ + { + chunk: { + content: "function authenticate() {}", + file_path: "src/auth.ts", + start_line: 10, + end_line: 25, + language: "typescript", + location: "src/auth.ts:10-25", + }, + score: 0.92, + }, + { + chunk: { + content: "export function login() {}", + file_path: "src/login.ts", + start_line: 5, + end_line: 15, + language: "typescript", + location: "src/login.ts:5-15", + }, + score: 0.78, + }, + ], + } + + mockSpawn.mockReturnValue(createMockProcess(JSON.stringify(jsonResponse), "", 0)) + + const results = await cli.search("authentication", "/repo") + + expect(results).toHaveLength(2) + expect(results[0].chunk.file_path).toBe("src/auth.ts") + expect(results[0].chunk.start_line).toBe(10) + expect(results[0].chunk.end_line).toBe(25) + expect(results[0].chunk.content).toBe("function authenticate() {}") + expect(results[0].score).toBe(0.92) + expect(results[1].chunk.file_path).toBe("src/login.ts") + expect(results[1].score).toBe(0.78) + }) + + it("should handle empty results response", async () => { + mockSpawn.mockReturnValue(createMockProcess(JSON.stringify({ query: "nonexistent", results: [] }), "", 0)) + + const results = await cli.search("nonexistent", "/repo") + + expect(results).toEqual([]) + }) + + it("should handle error response from semble", async () => { + mockSpawn.mockReturnValue(createMockProcess(JSON.stringify({ error: "No results found." }), "", 0)) + + const results = await cli.search("nonexistent", "/repo") + + expect(results).toEqual([]) + }) + + it("should handle empty stdout", async () => { + mockSpawn.mockReturnValue(createMockProcess("", "", 0)) + + const results = await cli.search("test", "/repo") + + expect(results).toEqual([]) + }) + + it("should handle whitespace-only stdout", async () => { + mockSpawn.mockReturnValue(createMockProcess(" \n \n ", "", 0)) + + const results = await cli.search("test", "/repo") + + expect(results).toEqual([]) + }) + + it("should handle non-JSON output gracefully", async () => { + mockSpawn.mockReturnValue(createMockProcess("Some plain text output that is not JSON", "", 0)) + + const results = await cli.search("test", "/repo") + + expect(results).toEqual([]) + }) + }) +}) diff --git a/src/services/code-index/semble/index.ts b/src/services/code-index/semble/index.ts new file mode 100644 index 0000000000..4eafe50e69 --- /dev/null +++ b/src/services/code-index/semble/index.ts @@ -0,0 +1,4 @@ +export { SembleCLI } from "./semble-cli" +export { SembleProvider } from "./provider" +export type { ISembleProvider, SembleSearchResult, SembleChunk, SembleCheckResult, SembleConfig, SembleContentType } from "./types" +export { SEMBLE_DEFAULTS } from "./types" diff --git a/src/services/code-index/semble/provider.ts b/src/services/code-index/semble/provider.ts new file mode 100644 index 0000000000..0c41ae219a --- /dev/null +++ b/src/services/code-index/semble/provider.ts @@ -0,0 +1,203 @@ +import * as path from "path" +import * as vscode from "vscode" + +import { IndexingState } from "../interfaces/manager" +import { VectorStoreSearchResult } from "../interfaces/vector-store" +import { CodeIndexStateManager } from "../state-manager" +import { SembleCLI } from "./semble-cli" +import { ISembleProvider, SembleConfig, SembleContentType, SembleSearchResult, SEMBLE_DEFAULTS } from "./types" +import { TelemetryService } from "@roo-code/telemetry" +import { TelemetryEventName } from "@roo-code/types" + +/** + * Orchestrates code search via the semble CLI. + * + * Semble indexes on-the-fly with each search call — there is no separate + * "indexing" step. The provider simply validates that semble is installed, + * then delegates search queries to `semble search`. + * + * When `embedderProvider === "semble"`, the CodeIndexManager delegates + * to this provider instead of the ServiceFactory → orchestrator pipeline. + */ +export class SembleProvider implements ISembleProvider { + private readonly cli: SembleCLI + private readonly workspacePath: string + private readonly config: SembleConfig + private readonly stateManager: CodeIndexStateManager + + private _state: IndexingState = "Standby" + private _isInitialized = false + + constructor( + workspacePath: string, + _context: vscode.ExtensionContext, + stateManager: CodeIndexStateManager, + semblePath: string = SEMBLE_DEFAULTS.DEFAULT_PATH, + options?: { topK?: number; content?: SembleContentType }, + ) { + this.workspacePath = workspacePath + this.stateManager = stateManager + + this.config = { + semblePath, + topK: options?.topK ?? SEMBLE_DEFAULTS.DEFAULT_TOP_K, + content: options?.content ?? SEMBLE_DEFAULTS.DEFAULT_CONTENT, + } + + this.cli = new SembleCLI(semblePath) + } + + get state(): IndexingState { + return this._state + } + + /** + * Initializes the provider: checks semble is installed. + */ + async initialize(): Promise { + if (this._isInitialized) { + return + } + + // Check if semble is installed + const checkResult = await this.cli.checkInstalled() + + if (!checkResult.installed) { + const errorMsg = checkResult.error || "Semble is not installed" + this._state = "Error" + this.stateManager.setSystemState( + "Error", + `Semble not found. Install with 'pip install semble' or set the semble path in settings. Error: ${errorMsg}`, + ) + console.error("[SembleProvider] Semble not found:", errorMsg) + return + } + + console.log("[SembleProvider] Semble found and ready.") + + // Semble indexes on-the-fly, so we mark as "Indexed" (ready for search) + this._state = "Indexed" + this.stateManager.setSystemState("Indexed", "Semble is ready. Searches index on-the-fly.") + + this._isInitialized = true + } + + /** + * Starts indexing. Since semble indexes on-the-fly with each search, + * this just validates the installation and marks as ready. + */ + async startIndexing(): Promise { + if (!this._isInitialized) { + await this.initialize() + } + + if (this._state === "Error") { + return + } + + // Semble indexes on-the-fly — no separate indexing step needed. + // Mark as indexed/ready. + this._state = "Indexed" + this.stateManager.setSystemState("Indexed", "Semble is ready. Searches index on-the-fly.") + } + + /** + * Stops indexing (no-op — semble has no background indexing process). + */ + stopIndexing(): void { + // No-op: semble indexes on-the-fly per search call + } + + /** + * Searches the codebase using `semble search`. + */ + async searchIndex(query: string, directoryPrefix?: string): Promise { + if (!this._isInitialized) { + console.warn("[SembleProvider] searchIndex called before initialization") + return [] + } + + if (this._state === "Error") { + return [] + } + + try { + let searchPath = this.workspacePath + if (directoryPrefix) { + // Resolve relative paths against the workspace root + searchPath = path.isAbsolute(directoryPrefix) + ? directoryPrefix + : path.join(this.workspacePath, directoryPrefix) + } + console.log(`[SembleProvider] Searching for "${query}" in ${searchPath}`) + const results = await this.cli.search(query, searchPath, { + topK: this.config.topK, + content: this.config.content, + }) + + // Semble returns file paths relative to the search path. + // We join against searchPath (not workspacePath) to get correct absolute paths. + const converted = this._convertResults(results, searchPath) + console.log( + `[SembleProvider] Search returned ${converted.length} results (raw: ${results.length}). Sample path: ${converted[0]?.payload?.filePath ?? "none"}`, + ) + return converted + } catch (error: any) { + const errorMessage = error?.message || String(error) + console.error("[SembleProvider] Search failed:", errorMessage) + + TelemetryService.instance.captureEvent(TelemetryEventName.CODE_INDEX_ERROR, { + error: errorMessage, + stack: error instanceof Error ? error.stack : undefined, + location: "SembleProvider.searchIndex", + }) + + return [] + } + } + + /** + * Clears index data. Semble manages its own cache at ~/Library/Caches/semble/ + * (or equivalent per-platform). This resets the provider state but does not + * delete semble's on-disk cache — use `semble clear-cache` for that. + */ + async clearIndexData(): Promise { + this._state = "Standby" + this.stateManager.setSystemState("Standby", "Semble provider reset. On-disk cache remains until next rebuild.") + } + + /** + * Disposes resources. + */ + dispose(): void { + this._isInitialized = false + } + + // --- Private Helpers --- + + /** + * Converts Semble CLI results to Zoo's VectorStoreSearchResult format. + * + * Semble v0.3.0+ returns results in the format: + * { chunk: { content, file_path, start_line, end_line, language, location }, score } + * + * Note: semble returns file paths relative to the path it was invoked with. + * We join against `basePath` (the actual path passed to semble) to produce + * correct absolute paths for the rest of the pipeline. + * Results with missing file paths are excluded. + */ + private _convertResults(results: SembleSearchResult[], basePath: string): VectorStoreSearchResult[] { + return results + .filter((r) => r.chunk?.file_path) // Exclude results with no file path + .map((r, index) => ({ + id: `semble-${index}`, + score: r.score, + payload: { + filePath: path.join(basePath, r.chunk.file_path), + codeChunk: r.chunk?.content ?? "", + startLine: r.chunk?.start_line ?? 0, + endLine: r.chunk?.end_line ?? 0, + }, + })) + } +} diff --git a/src/services/code-index/semble/semble-cli.ts b/src/services/code-index/semble/semble-cli.ts new file mode 100644 index 0000000000..2e9e85af65 --- /dev/null +++ b/src/services/code-index/semble/semble-cli.ts @@ -0,0 +1,257 @@ +import { spawn } from "child_process" + +import { SembleSearchResult, SembleCheckResult, SembleContentType, SEMBLE_DEFAULTS } from "./types" + +/** + * Wraps the `semble` CLI for programmatic access. + * + * Semble must be installed via pip: `pip install semble` + * The semblePath should be a direct path to the executable (e.g. "semble" or "/usr/local/bin/semble"). + * + * All methods spawn the semble process via child_process.spawn with array + * arguments (no shell) to prevent shell injection. + * + * Semble CLI (v0.3.0+) subcommands: + * search [path] — search a codebase + * find-related [path] — find similar code + * init — write sub-agent file + * savings — show token stats + * + * Common flags: + * -k, --top-k N — number of results (default: 5) + * --content TYPE [TYPE ...] — content types: code, docs, config, all + */ +export class SembleCLI { + private readonly semblePath: string + + constructor(semblePath: string = SEMBLE_DEFAULTS.DEFAULT_PATH) { + this.semblePath = semblePath + } + + /** + * Checks whether semble is installed and meets the minimum version requirement (0.3.0). + * + * - Confirms the executable runs via `semble --help`. + * - Queries `pip show semble` (falling back to `pip3`) to get the installed version + * and validates >= 0.3.0. + */ + async checkInstalled(): Promise { + // 1. Confirm the executable is runnable + try { + await this._spawn(["--help"], { timeout: 10_000 }) + } catch (error: any) { + return { + installed: false, + error: error?.stderr?.trim() || error?.message || "Failed to run semble", + } + } + + // 2. Query pip for the installed semble version + const version = await this._getPipVersion() + if (!version) { + // pip couldn't find it — semble may be installed outside pip, allow it + return { installed: true, version: "unknown" } + } + + // 3. Validate >= 0.3.0 + const match = version.match(/^(\d+)\.(\d+)\.(\d+)/) + if (!match) { + return { installed: true, version: "unknown" } + } + + const [major, minor] = [Number(match[1]), Number(match[2])] + if (major === 0 && minor < 3) { + return { + installed: false, + error: `Semble version ${version} is not supported. Please upgrade to semble >= 0.3.0 (run: pip install --upgrade semble).`, + } + } + + return { installed: true, version } + } + + /** + * Searches a codebase. Semble indexes on-the-fly during search. + * + * Usage: semble search [path] [-k N] [--content TYPE [TYPE ...]] + */ + async search( + query: string, + repoPath: string, + options?: { topK?: number; content?: SembleContentType }, + ): Promise { + const topK = options?.topK ?? SEMBLE_DEFAULTS.DEFAULT_TOP_K + const args = ["search", query, repoPath, "-k", String(topK)] + if (options?.content && options.content !== "code") { + args.push("--content", options.content) + } + + try { + const { stdout } = await this._spawn(args, { timeout: 120_000 }) + return this._parseOutput(stdout) + } catch (error: any) { + const stderr = error?.stderr?.trim() || "" + const message = error?.message || String(error) + throw new Error(`Semble search failed: ${stderr || message}`) + } + } + + /** + * Finds code similar to a known location. + * + * Usage: semble find-related [path] [-k N] [--content TYPE [TYPE ...]] + */ + async findRelated( + filePath: string, + line: number, + repoPath: string, + options?: { topK?: number; content?: SembleContentType }, + ): Promise { + const topK = options?.topK ?? SEMBLE_DEFAULTS.DEFAULT_TOP_K + const args = ["find-related", filePath, String(line), repoPath, "-k", String(topK)] + if (options?.content && options.content !== "code") { + args.push("--content", options.content) + } + + try { + const { stdout } = await this._spawn(args, { timeout: 120_000 }) + return this._parseOutput(stdout) + } catch (error: any) { + const stderr = error?.stderr?.trim() || "" + const message = error?.message || String(error) + throw new Error(`Semble find-related failed: ${stderr || message}`) + } + } + + /** + * Queries `pip show semble` (falling back to `pip3`) and returns the version string, + * or `undefined` if semble is not found in pip or pip is unavailable. + */ + private async _getPipVersion(): Promise { + for (const pipCmd of ["pip", "pip3"]) { + try { + const stdout = await this._spawnExternal(pipCmd, ["show", "semble"], { timeout: 10_000 }) + // pip show outputs lines like "Version: 0.3.1" + const match = stdout.match(/^Version:\s*(.+)$/m) + if (match) { + return match[1].trim() + } + } catch { + // try next + } + } + return undefined + } + + /** + * Spawns the semble process and collects stdout/stderr. + * Uses spawn without shell — args are passed as an array, no injection risk. + */ + private _spawn(args: string[], options: { timeout: number }): Promise<{ stdout: string; stderr: string }> { + return new Promise((resolve, reject) => { + const child = spawn(this.semblePath, args, { + shell: false, + timeout: options.timeout, + maxBuffer: 10 * 1024 * 1024, + stdio: ["ignore", "pipe", "pipe"], + } as any) + + let stdout = "" + let stderr = "" + + child.stdout?.on("data", (data: Buffer) => { + stdout += data.toString() + }) + + child.stderr?.on("data", (data: Buffer) => { + stderr += data.toString() + }) + + child.on("error", (err: Error) => { + reject({ message: err.message, stderr }) + }) + + child.on("close", (code: number | null) => { + if (code === 0) { + resolve({ stdout, stderr }) + } else { + reject({ message: `Process exited with code ${code}`, stderr, stdout }) + } + }) + }) + } + + /** + * Spawns an arbitrary external command (not the semble executable) and returns stdout. + */ + private _spawnExternal(cmd: string, args: string[], options: { timeout: number }): Promise { + return new Promise((resolve, reject) => { + const child = spawn(cmd, args, { + shell: false, + timeout: options.timeout, + stdio: ["ignore", "pipe", "pipe"], + } as any) + + let stdout = "" + let stderr = "" + + child.stdout?.on("data", (data: Buffer) => { + stdout += data.toString() + }) + child.stderr?.on("data", (data: Buffer) => { + stderr += data.toString() + }) + child.on("error", (err: Error) => { + reject({ message: err.message, stderr }) + }) + child.on("close", (code: number | null) => { + if (code === 0) { + resolve(stdout) + } else { + reject({ message: `Process exited with code ${code}`, stderr }) + } + }) + }) + } + + /** + * Parses semble CLI JSON output into structured results. + * + * Semble v0.3.0+ outputs JSON by default with format: + * { "query": "...", "results": [{ "chunk": { "content": "...", "file_path": "...", "start_line": N, "end_line": M, "language": "...", "location": "..." }, "score": X }] } + * + * If the query returns no results, semble outputs: + * { "error": "No results found." } + */ + private _parseOutput(stdout: string): SembleSearchResult[] { + const trimmed = stdout.trim() + if (!trimmed) { + return [] + } + + try { + const parsed = JSON.parse(trimmed) + + // Handle error response: {"error": "No results found."} + if (parsed.error) { + return [] + } + + // Handle successful response: {query, results: [{chunk, score}]} + if (parsed.results && Array.isArray(parsed.results)) { + return parsed.results as SembleSearchResult[] + } + + // Fallback: if it's a flat array (older format) + if (Array.isArray(parsed)) { + return parsed as SembleSearchResult[] + } + + return [] + } catch { + // Not JSON — this shouldn't happen with v0.3.0+ but handle gracefully + console.warn("[SembleCLI] Unexpected non-JSON output from semble") + return [] + } + } +} diff --git a/src/services/code-index/semble/types.ts b/src/services/code-index/semble/types.ts new file mode 100644 index 0000000000..3657e4e60d --- /dev/null +++ b/src/services/code-index/semble/types.ts @@ -0,0 +1,90 @@ +import { IndexingState } from "../interfaces/manager" +import { VectorStoreSearchResult } from "../interfaces/vector-store" + +/** + * Content types supported by semble for indexing. + * Maps to the `--content` CLI flag. + */ +export type SembleContentType = "code" | "docs" | "config" | "all" + +/** + * A single chunk returned by semble search results. + * Matches the `chunk` field in semble's JSON output format. + */ +export interface SembleChunk { + content: string + file_path: string + start_line: number + end_line: number + language: string | null + location: string +} + +/** + * Result from a semble CLI search invocation. + * Matches the JSON output format: `{ query, results: [{ chunk, score }] }`. + */ +export interface SembleSearchResult { + chunk: SembleChunk + score: number +} + +/** + * Result from checking if semble is installed. + */ +export interface SembleCheckResult { + installed: boolean + version?: string + error?: string +} + +/** + * Configuration for the Semble provider. + */ +export interface SembleConfig { + /** Path to the semble executable. Default: "semble" (uses PATH). */ + semblePath: string + /** Maximum search results to return. Default: 10. */ + topK: number + /** Content types to index. Default: "code". */ + content: SembleContentType +} + +/** + * Interface for the SembleProvider that wraps the semble CLI. + * + * Note: `findRelated` is available on SembleCLI but not yet exposed through + * this provider or CodeIndexManager. It's reserved for future use — e.g., a + * "find similar code" tool or context menu action. + */ +export interface ISembleProvider { + /** Initializes the provider — checks semble is installed. */ + initialize(): Promise + + /** Marks the provider as ready (semble indexes on-the-fly). */ + startIndexing(): Promise + + /** Stops indexing (no-op — semble has no background process). */ + stopIndexing(): void + + /** Searches the codebase for relevant code. */ + searchIndex(query: string, directoryPrefix?: string): Promise + + /** Clears index data (no-op in current version). */ + clearIndexData(): Promise + + /** Disposes resources. */ + dispose(): void + + /** Current state. */ + readonly state: IndexingState +} + +/** + * Default configuration values for Semble. + */ +export const SEMBLE_DEFAULTS = { + DEFAULT_PATH: "semble", + DEFAULT_TOP_K: 10, + DEFAULT_CONTENT: "code" as SembleContentType, +} diff --git a/src/services/code-index/service-factory.ts b/src/services/code-index/service-factory.ts index d23eff4810..335812bd41 100644 --- a/src/services/code-index/service-factory.ts +++ b/src/services/code-index/service-factory.ts @@ -45,6 +45,12 @@ export class CodeIndexServiceFactory { const provider = config.embedderProvider as EmbedderProvider + if (provider === "semble") { + throw new Error( + "Semble provider handles its own embedding. Do not call createEmbedder() for semble — use SembleProvider instead.", + ) + } + if (provider === "openai") { const apiKey = config.openAiOptions?.openAiNativeApiKey @@ -141,6 +147,13 @@ export class CodeIndexServiceFactory { const config = this.configManager.getConfig() const provider = config.embedderProvider as EmbedderProvider + + if (provider === "semble") { + throw new Error( + "Semble provider handles its own vector storage. Do not call createVectorStore() for semble — use SembleProvider instead.", + ) + } + const defaultModel = getDefaultModelId(provider) // Use the embedding model ID from config, not the chat model IDs const modelId = config.modelId ?? defaultModel diff --git a/src/shared/embeddingModels.ts b/src/shared/embeddingModels.ts index 7f5c9fac2b..a89e2c9488 100644 --- a/src/shared/embeddingModels.ts +++ b/src/shared/embeddingModels.ts @@ -87,6 +87,9 @@ export const EMBEDDING_MODEL_PROFILES: EmbeddingModelProfiles = { "qwen/qwen3-embedding-4b": { dimension: 2560, scoreThreshold: 0.4 }, "qwen/qwen3-embedding-8b": { dimension: 4096, scoreThreshold: 0.4 }, }, + semble: { + "potion-code-16M": { dimension: 256, scoreThreshold: 0.4 }, + }, } /** @@ -185,6 +188,9 @@ export function getDefaultModelId(provider: EmbedderProvider): string { case "openrouter": return "openai/text-embedding-3-large" + case "semble": + return "potion-code-16M" + default: // Fallback for unknown providers console.warn(`Unknown provider for default model ID: ${provider}. Falling back to OpenAI default.`) diff --git a/webview-ui/src/components/chat/CodeIndexPopover.tsx b/webview-ui/src/components/chat/CodeIndexPopover.tsx index 763c243ec1..dc9d5e7d39 100644 --- a/webview-ui/src/components/chat/CodeIndexPopover.tsx +++ b/webview-ui/src/components/chat/CodeIndexPopover.tsx @@ -81,6 +81,7 @@ interface LocalCodeIndexSettings { codebaseIndexVercelAiGatewayApiKey?: string codebaseIndexOpenRouterApiKey?: string codebaseIndexOpenRouterSpecificProvider?: string + codebaseIndexSemblePath?: string } // Validation schema for codebase index settings @@ -176,6 +177,13 @@ const createValidationSchema = (provider: EmbedderProvider, t: any) => { .min(1, t("settings:codeIndex.validation.modelSelectionRequired")), }) + case "semble": + // Semble requires no API keys, Qdrant URL, or model selection + return z.object({ + codebaseIndexEnabled: z.boolean(), + codebaseIndexSemblePath: z.string().optional(), + }) + default: return baseSchema } @@ -225,6 +233,7 @@ export const CodeIndexPopover: React.FC = ({ codebaseIndexVercelAiGatewayApiKey: "", codebaseIndexOpenRouterApiKey: "", codebaseIndexOpenRouterSpecificProvider: "", + codebaseIndexSemblePath: "", }) // Initial settings state - stores the settings when popover opens @@ -265,6 +274,7 @@ export const CodeIndexPopover: React.FC = ({ codebaseIndexOpenRouterApiKey: "", codebaseIndexOpenRouterSpecificProvider: codebaseIndexConfig.codebaseIndexOpenRouterSpecificProvider || "", + codebaseIndexSemblePath: codebaseIndexConfig.codebaseIndexSemblePath || "", } setInitialSettings(settings) setCurrentSettings(settings) @@ -761,6 +771,9 @@ export const CodeIndexPopover: React.FC = ({ {t("settings:codeIndex.openRouterProvider")} + + {t("settings:codeIndex.sembleProvider")} + @@ -1430,54 +1443,98 @@ export const CodeIndexPopover: React.FC = ({ )} - {/* Qdrant Settings */} -
- - - updateSetting("codebaseIndexQdrantUrl", e.target.value) - } - onBlur={(e: any) => { - // Set default Qdrant URL if field is empty - if (!e.target.value.trim()) { - currentSettings.codebaseIndexQdrantUrl = DEFAULT_QDRANT_URL - updateSetting("codebaseIndexQdrantUrl", DEFAULT_QDRANT_URL) - } - }} - placeholder={t("settings:codeIndex.qdrantUrlPlaceholder")} - className={cn("w-full", { - "border-red-500": formErrors.codebaseIndexQdrantUrl, - })} - /> - {formErrors.codebaseIndexQdrantUrl && ( -

- {formErrors.codebaseIndexQdrantUrl} + {currentSettings.codebaseIndexEmbedderProvider === "semble" && ( + <> +

+

+ {t("settings:codeIndex.sembleInstallTitle")} +

+

+ {t("settings:codeIndex.sembleInstallDescription")} +

+ + {t("settings:codeIndex.sembleInstallCommand")} + +

+ {t("settings:codeIndex.sembleInstallNote")} +

+
+
+ + + updateSetting("codebaseIndexSemblePath", e.target.value) + } + placeholder={t("settings:codeIndex.semblePathPlaceholder")} + className="w-full" + /> +

+ {t("settings:codeIndex.semblePathDescription")} +

+
+

+ {t("settings:codeIndex.sembleDescription")}

- )} -
+ + )} -
- - updateSetting("codeIndexQdrantApiKey", e.target.value)} - placeholder={t("settings:codeIndex.qdrantApiKeyPlaceholder")} - className={cn("w-full", { - "border-red-500": formErrors.codeIndexQdrantApiKey, - })} - /> - {formErrors.codeIndexQdrantApiKey && ( -

- {formErrors.codeIndexQdrantApiKey} -

- )} -
+ {/* Qdrant Settings — hidden for semble */} + {currentSettings.codebaseIndexEmbedderProvider !== "semble" && ( + <> +
+ + + updateSetting("codebaseIndexQdrantUrl", e.target.value) + } + onBlur={(e: any) => { + // Set default Qdrant URL if field is empty + if (!e.target.value.trim()) { + currentSettings.codebaseIndexQdrantUrl = DEFAULT_QDRANT_URL + updateSetting("codebaseIndexQdrantUrl", DEFAULT_QDRANT_URL) + } + }} + placeholder={t("settings:codeIndex.qdrantUrlPlaceholder")} + className={cn("w-full", { + "border-red-500": formErrors.codebaseIndexQdrantUrl, + })} + /> + {formErrors.codebaseIndexQdrantUrl && ( +

+ {formErrors.codebaseIndexQdrantUrl} +

+ )} +
+ +
+ + + updateSetting("codeIndexQdrantApiKey", e.target.value) + } + placeholder={t("settings:codeIndex.qdrantApiKeyPlaceholder")} + className={cn("w-full", { + "border-red-500": formErrors.codeIndexQdrantApiKey, + })} + /> + {formErrors.codeIndexQdrantApiKey && ( +

+ {formErrors.codeIndexQdrantApiKey} +

+ )} +
+ + )} )} diff --git a/webview-ui/src/i18n/locales/en/settings.json b/webview-ui/src/i18n/locales/en/settings.json index a3c11be386..cb786410eb 100644 --- a/webview-ui/src/i18n/locales/en/settings.json +++ b/webview-ui/src/i18n/locales/en/settings.json @@ -198,6 +198,15 @@ "openRouterApiKeyPlaceholder": "Enter your OpenRouter API key", "openRouterProviderRoutingLabel": "OpenRouter Provider Routing", "openRouterProviderRoutingDescription": "OpenRouter routes requests to the best available providers for your embedding model. By default, requests are load balanced across the top providers to maximize uptime. However, you can choose a specific provider to use for this model.", + "sembleProvider": "Semble - Local", + "semblePathLabel": "Semble Path", + "semblePathPlaceholder": "semble", + "semblePathDescription": "Path to the semble executable. Leave as \"semble\" if it's on your PATH, or provide the full path (e.g. \"/usr/local/bin/semble\").", + "sembleInstallTitle": "Install semble", + "sembleInstallDescription": "Semble must be installed via pip before use:", + "sembleInstallCommand": "pip install semble", + "sembleInstallNote": "Requires Python 3.10+. After installing, semble will be available on your PATH.", + "sembleDescription": "Local hybrid search — no API keys required.", "openaiCompatibleProvider": "OpenAI Compatible", "openAiKeyLabel": "OpenAI API Key", "openAiKeyPlaceholder": "Enter your OpenAI API key", From 54317b8a331996400300b24a52593610fe35e6e6 Mon Sep 17 00:00:00 2001 From: Naved Date: Sat, 30 May 2026 07:33:17 -0700 Subject: [PATCH 02/11] fix test and translations --- src/services/code-index/semble/provider.ts | 2 +- webview-ui/src/i18n/locales/ca/settings.json | 11 ++++++++++- webview-ui/src/i18n/locales/de/settings.json | 11 ++++++++++- webview-ui/src/i18n/locales/en/settings.json | 2 +- webview-ui/src/i18n/locales/es/settings.json | 11 ++++++++++- webview-ui/src/i18n/locales/fr/settings.json | 11 ++++++++++- webview-ui/src/i18n/locales/hi/settings.json | 11 ++++++++++- webview-ui/src/i18n/locales/id/settings.json | 11 ++++++++++- webview-ui/src/i18n/locales/it/settings.json | 11 ++++++++++- webview-ui/src/i18n/locales/ja/settings.json | 11 ++++++++++- webview-ui/src/i18n/locales/ko/settings.json | 11 ++++++++++- webview-ui/src/i18n/locales/nl/settings.json | 11 ++++++++++- webview-ui/src/i18n/locales/pl/settings.json | 11 ++++++++++- webview-ui/src/i18n/locales/pt-BR/settings.json | 11 ++++++++++- webview-ui/src/i18n/locales/ru/settings.json | 11 ++++++++++- webview-ui/src/i18n/locales/tr/settings.json | 11 ++++++++++- webview-ui/src/i18n/locales/vi/settings.json | 11 ++++++++++- webview-ui/src/i18n/locales/zh-CN/settings.json | 11 ++++++++++- webview-ui/src/i18n/locales/zh-TW/settings.json | 11 ++++++++++- 19 files changed, 172 insertions(+), 19 deletions(-) diff --git a/src/services/code-index/semble/provider.ts b/src/services/code-index/semble/provider.ts index 0c41ae219a..59c3b819a1 100644 --- a/src/services/code-index/semble/provider.ts +++ b/src/services/code-index/semble/provider.ts @@ -193,7 +193,7 @@ export class SembleProvider implements ISembleProvider { id: `semble-${index}`, score: r.score, payload: { - filePath: path.join(basePath, r.chunk.file_path), + filePath: path.join(basePath, r.chunk.file_path).replace(/\\/g, "/"), codeChunk: r.chunk?.content ?? "", startLine: r.chunk?.start_line ?? 0, endLine: r.chunk?.end_line ?? 0, diff --git a/webview-ui/src/i18n/locales/ca/settings.json b/webview-ui/src/i18n/locales/ca/settings.json index 92ecfa7e73..3840db1c22 100644 --- a/webview-ui/src/i18n/locales/ca/settings.json +++ b/webview-ui/src/i18n/locales/ca/settings.json @@ -213,7 +213,16 @@ "stoppingButton": "Aturant...", "workspaceToggleLabel": "Activar la indexació per a aquest espai de treball", "workspaceDisabledMessage": "La indexació està configurada però no habilitada per a aquest espai de treball.", - "autoEnableDefaultLabel": "Habilitar automàticament la indexació per a nous espais de treball" + "autoEnableDefaultLabel": "Habilitar automàticament la indexació per a nous espais de treball", + "sembleProvider": "Semble - Local", + "semblePathLabel": "Ruta de Semble", + "semblePathPlaceholder": "semble", + "semblePathDescription": "Ruta a l'executable de semble. Deixeu-ho com a \"semble\" si està al vostre PATH, o proporcioneu la ruta completa (p. ex. \"/usr/local/bin/semble\").", + "sembleInstallTitle": "Instal·lar semble", + "sembleInstallDescription": "Semble s'ha d'instal·lar via pip abans d'usar-lo:", + "sembleInstallCommand": "pip install semble", + "sembleInstallNote": "Requereix Python 3.10+. Assegureu-vos que el binari semble instal·lat estigui al vostre PATH, o especifiqueu la ruta completa a dalt.", + "sembleDescription": "Cerca híbrida local — no calen claus d'API." }, "autoApprove": { "toggleShortcut": "Pots configurar una drecera global per a aquesta configuració a les preferències del teu IDE.", diff --git a/webview-ui/src/i18n/locales/de/settings.json b/webview-ui/src/i18n/locales/de/settings.json index 5154d11039..cce2f51697 100644 --- a/webview-ui/src/i18n/locales/de/settings.json +++ b/webview-ui/src/i18n/locales/de/settings.json @@ -213,7 +213,16 @@ "stoppingButton": "Wird gestoppt...", "workspaceToggleLabel": "Indexierung für diesen Arbeitsbereich aktivieren", "workspaceDisabledMessage": "Indexierung ist konfiguriert, aber nicht für diesen Arbeitsbereich aktiviert.", - "autoEnableDefaultLabel": "Indexierung für neue Arbeitsbereiche automatisch aktivieren" + "autoEnableDefaultLabel": "Indexierung für neue Arbeitsbereiche automatisch aktivieren", + "sembleProvider": "Semble - Lokal", + "semblePathLabel": "Semble-Pfad", + "semblePathPlaceholder": "semble", + "semblePathDescription": "Pfad zur semble-Ausführungsdatei. Belasse es bei \"semble\", wenn es in deinem PATH ist, oder gib den vollständigen Pfad an (z.B. \"/usr/local/bin/semble\").", + "sembleInstallTitle": "Semble installieren", + "sembleInstallDescription": "Semble muss vor der Verwendung über pip installiert werden:", + "sembleInstallCommand": "pip install semble", + "sembleInstallNote": "Erfordert Python 3.10+. Stelle sicher, dass die installierte semble-Binärdatei in deinem PATH ist, oder gib oben den vollständigen Pfad an.", + "sembleDescription": "Lokale hybride Suche — keine API-Schlüssel erforderlich." }, "autoApprove": { "toggleShortcut": "Du kannst in deinen IDE-Einstellungen einen globalen Shortcut für diese Einstellung konfigurieren.", diff --git a/webview-ui/src/i18n/locales/en/settings.json b/webview-ui/src/i18n/locales/en/settings.json index cb786410eb..e3c9f089b1 100644 --- a/webview-ui/src/i18n/locales/en/settings.json +++ b/webview-ui/src/i18n/locales/en/settings.json @@ -205,7 +205,7 @@ "sembleInstallTitle": "Install semble", "sembleInstallDescription": "Semble must be installed via pip before use:", "sembleInstallCommand": "pip install semble", - "sembleInstallNote": "Requires Python 3.10+. After installing, semble will be available on your PATH.", + "sembleInstallNote": "Requires Python 3.10+. Ensure the installed semble binary is on your PATH, or specify the full path above.", "sembleDescription": "Local hybrid search — no API keys required.", "openaiCompatibleProvider": "OpenAI Compatible", "openAiKeyLabel": "OpenAI API Key", diff --git a/webview-ui/src/i18n/locales/es/settings.json b/webview-ui/src/i18n/locales/es/settings.json index 8ec3e29338..540dd4ee08 100644 --- a/webview-ui/src/i18n/locales/es/settings.json +++ b/webview-ui/src/i18n/locales/es/settings.json @@ -213,7 +213,16 @@ "stoppingButton": "Deteniendo...", "workspaceToggleLabel": "Activar indexación para este espacio de trabajo", "workspaceDisabledMessage": "La indexación está configurada pero no habilitada para este espacio de trabajo.", - "autoEnableDefaultLabel": "Habilitar automáticamente la indexación para nuevos espacios de trabajo" + "autoEnableDefaultLabel": "Habilitar automáticamente la indexación para nuevos espacios de trabajo", + "sembleProvider": "Semble - Local", + "semblePathLabel": "Ruta de Semble", + "semblePathPlaceholder": "semble", + "semblePathDescription": "Ruta al ejecutable de semble. Déjelo como \"semble\" si está en su PATH, o proporcione la ruta completa (ej. \"/usr/local/bin/semble\").", + "sembleInstallTitle": "Instalar semble", + "sembleInstallDescription": "Semble debe instalarse vía pip antes de usarlo:", + "sembleInstallCommand": "pip install semble", + "sembleInstallNote": "Requiere Python 3.10+. Asegúrese de que el binario semble instalado esté en su PATH, o especifique la ruta completa arriba.", + "sembleDescription": "Búsqueda híbrida local — no se requieren claves de API." }, "autoApprove": { "toggleShortcut": "Puedes configurar un atajo global para esta configuración en las preferencias de tu IDE.", diff --git a/webview-ui/src/i18n/locales/fr/settings.json b/webview-ui/src/i18n/locales/fr/settings.json index d746111a0a..33d656baeb 100644 --- a/webview-ui/src/i18n/locales/fr/settings.json +++ b/webview-ui/src/i18n/locales/fr/settings.json @@ -213,7 +213,16 @@ "stoppingButton": "Arrêt en cours...", "workspaceToggleLabel": "Activer l'indexation pour cet espace de travail", "workspaceDisabledMessage": "L'indexation est configurée mais non activée pour cet espace de travail.", - "autoEnableDefaultLabel": "Activer automatiquement l'indexation pour les nouveaux espaces de travail" + "autoEnableDefaultLabel": "Activer automatiquement l'indexation pour les nouveaux espaces de travail", + "sembleProvider": "Semble - Local", + "semblePathLabel": "Chemin de Semble", + "semblePathPlaceholder": "semble", + "semblePathDescription": "Chemin vers l'exécutable semble. Laissez \"semble\" s'il est dans votre PATH, ou fournissez le chemin complet (ex. \"/usr/local/bin/semble\").", + "sembleInstallTitle": "Installer semble", + "sembleInstallDescription": "Semble doit être installé via pip avant utilisation :", + "sembleInstallCommand": "pip install semble", + "sembleInstallNote": "Nécessite Python 3.10+. Assurez-vous que le binaire semble installé est dans votre PATH, ou spécifiez le chemin complet ci-dessus.", + "sembleDescription": "Recherche hybride locale — aucune clé API requise." }, "autoApprove": { "toggleShortcut": "Vous pouvez configurer un raccourci global pour ce paramètre dans les préférences de votre IDE.", diff --git a/webview-ui/src/i18n/locales/hi/settings.json b/webview-ui/src/i18n/locales/hi/settings.json index 9a77b69ee4..3b3f9d8254 100644 --- a/webview-ui/src/i18n/locales/hi/settings.json +++ b/webview-ui/src/i18n/locales/hi/settings.json @@ -213,7 +213,16 @@ "stoppingButton": "रोक रहा है...", "workspaceToggleLabel": "इस वर्कस्पेस के लिए इंडेक्सिंग सक्षम करें", "workspaceDisabledMessage": "इंडेक्सिंग कॉन्फ़िगर की गई है लेकिन इस वर्कस्पेस के लिए सक्षम नहीं है।", - "autoEnableDefaultLabel": "नए वर्कस्पेस के लिए स्वचालित रूप से इंडेक्सिंग सक्षम करें" + "autoEnableDefaultLabel": "नए वर्कस्पेस के लिए स्वचालित रूप से इंडेक्सिंग सक्षम करें", + "sembleProvider": "Semble - स्थानीय", + "semblePathLabel": "Semble पथ", + "semblePathPlaceholder": "semble", + "semblePathDescription": "semble निष्पादन योग्य का पथ। यदि यह आपके PATH में है तो \"semble\" रहने दें, या पूरा पथ प्रदान करें (जैसे \"/usr/local/bin/semble\")।", + "sembleInstallTitle": "semble इंस्टॉल करें", + "sembleInstallDescription": "उपयोग से पहले Semble को pip के माध्यम से इंस्टॉल किया जाना चाहिए:", + "sembleInstallCommand": "pip install semble", + "sembleInstallNote": "Python 3.10+ आवश्यक है। सुनिश्चित करें कि इंस्टॉल किया गया semble बाइनरी आपके PATH पर है, या ऊपर पूरा पथ निर्दिष्ट करें।", + "sembleDescription": "स्थानीय हाइब्रिड खोज — किसी API कुंजी की आवश्यकता नहीं।" }, "autoApprove": { "toggleShortcut": "आप अपनी आईडीई वरीयताओं में इस सेटिंग के लिए एक वैश्विक शॉर्टकट कॉन्फ़िगर कर सकते हैं।", diff --git a/webview-ui/src/i18n/locales/id/settings.json b/webview-ui/src/i18n/locales/id/settings.json index 41eae1b053..f57155283b 100644 --- a/webview-ui/src/i18n/locales/id/settings.json +++ b/webview-ui/src/i18n/locales/id/settings.json @@ -213,7 +213,16 @@ "stoppingButton": "Menghentikan...", "workspaceToggleLabel": "Aktifkan pengindeksan untuk ruang kerja ini", "workspaceDisabledMessage": "Pengindeksan dikonfigurasi tetapi tidak diaktifkan untuk ruang kerja ini.", - "autoEnableDefaultLabel": "Aktifkan pengindeksan secara otomatis untuk ruang kerja baru" + "autoEnableDefaultLabel": "Aktifkan pengindeksan secara otomatis untuk ruang kerja baru", + "sembleProvider": "Semble - Lokal", + "semblePathLabel": "Path Semble", + "semblePathPlaceholder": "semble", + "semblePathDescription": "Path ke executable semble. Biarkan sebagai \"semble\" jika sudah ada di PATH Anda, atau berikan path lengkap (mis. \"/usr/local/bin/semble\").", + "sembleInstallTitle": "Instal semble", + "sembleInstallDescription": "Semble harus diinstal melalui pip sebelum digunakan:", + "sembleInstallCommand": "pip install semble", + "sembleInstallNote": "Membutuhkan Python 3.10+. Pastikan binary semble yang terinstal ada di PATH Anda, atau tentukan path lengkap di atas.", + "sembleDescription": "Pencarian hybrid lokal — tidak memerlukan kunci API." }, "autoApprove": { "toggleShortcut": "Anda dapat mengonfigurasi pintasan global untuk pengaturan ini di preferensi IDE Anda.", diff --git a/webview-ui/src/i18n/locales/it/settings.json b/webview-ui/src/i18n/locales/it/settings.json index 806fb44462..dc14bff598 100644 --- a/webview-ui/src/i18n/locales/it/settings.json +++ b/webview-ui/src/i18n/locales/it/settings.json @@ -213,7 +213,16 @@ "stoppingButton": "Interruzione...", "workspaceToggleLabel": "Abilita l'indicizzazione per questo workspace", "workspaceDisabledMessage": "L'indicizzazione è configurata ma non abilitata per questo workspace.", - "autoEnableDefaultLabel": "Abilita automaticamente l'indicizzazione per i nuovi workspace" + "autoEnableDefaultLabel": "Abilita automaticamente l'indicizzazione per i nuovi workspace", + "sembleProvider": "Semble - Locale", + "semblePathLabel": "Percorso Semble", + "semblePathPlaceholder": "semble", + "semblePathDescription": "Percorso dell'eseguibile semble. Lascia \"semble\" se è nel tuo PATH, o fornisci il percorso completo (es. \"/usr/local/bin/semble\").", + "sembleInstallTitle": "Installa semble", + "sembleInstallDescription": "Semble deve essere installato tramite pip prima dell'uso:", + "sembleInstallCommand": "pip install semble", + "sembleInstallNote": "Richiede Python 3.10+. Assicurati che il binario semble installato sia nel tuo PATH, o specifica il percorso completo sopra.", + "sembleDescription": "Ricerca ibrida locale — nessuna chiave API richiesta." }, "autoApprove": { "toggleShortcut": "Puoi configurare una scorciatoia globale per questa impostazione nelle preferenze del tuo IDE.", diff --git a/webview-ui/src/i18n/locales/ja/settings.json b/webview-ui/src/i18n/locales/ja/settings.json index b9a0976c42..c1a49fa037 100644 --- a/webview-ui/src/i18n/locales/ja/settings.json +++ b/webview-ui/src/i18n/locales/ja/settings.json @@ -213,7 +213,16 @@ "stoppingButton": "停止中...", "workspaceToggleLabel": "このワークスペースのインデックス作成を有効にする", "workspaceDisabledMessage": "インデックス作成は設定済みですが、このワークスペースでは有効になっていません。", - "autoEnableDefaultLabel": "新しいワークスペースのインデックス作成を自動的に有効にする" + "autoEnableDefaultLabel": "新しいワークスペースのインデックス作成を自動的に有効にする", + "sembleProvider": "Semble - ローカル", + "semblePathLabel": "Sembleパス", + "semblePathPlaceholder": "semble", + "semblePathDescription": "semble実行ファイルのパス。PATHにある場合は\"semble\"のままにするか、フルパスを指定してください(例:\"/usr/local/bin/semble\")。", + "sembleInstallTitle": "sembleのインストール", + "sembleInstallDescription": "使用前にpipでSembleをインストールする必要があります:", + "sembleInstallCommand": "pip install semble", + "sembleInstallNote": "Python 3.10+が必要です。インストールしたsembleバイナリがPATHにあることを確認するか、上記でフルパスを指定してください。", + "sembleDescription": "ローカルハイブリッド検索 — APIキー不要。" }, "autoApprove": { "toggleShortcut": "IDEの環境設定で、この設定のグローバルショートカットを設定できます。", diff --git a/webview-ui/src/i18n/locales/ko/settings.json b/webview-ui/src/i18n/locales/ko/settings.json index 3a88c9fbde..c9f9bc072b 100644 --- a/webview-ui/src/i18n/locales/ko/settings.json +++ b/webview-ui/src/i18n/locales/ko/settings.json @@ -213,7 +213,16 @@ "stoppingButton": "중지 중...", "workspaceToggleLabel": "이 워크스페이스에 대한 인덱싱 활성화", "workspaceDisabledMessage": "인덱싱이 구성되었지만 이 워크스페이스에서는 활성화되지 않았습니다.", - "autoEnableDefaultLabel": "새 워크스페이스에 대한 인덱싱 자동 활성화" + "autoEnableDefaultLabel": "새 워크스페이스에 대한 인덱싱 자동 활성화", + "sembleProvider": "Semble - 로컬", + "semblePathLabel": "Semble 경로", + "semblePathPlaceholder": "semble", + "semblePathDescription": "semble 실행 파일의 경로. PATH에 있으면 \"semble\"로 두거나, 전체 경로를 제공하세요 (예: \"/usr/local/bin/semble\").", + "sembleInstallTitle": "semble 설치", + "sembleInstallDescription": "사용 전에 pip를 통해 Semble을 설치해야 합니다:", + "sembleInstallCommand": "pip install semble", + "sembleInstallNote": "Python 3.10+가 필요합니다. 설치된 semble 바이너리가 PATH에 있는지 확인하거나, 위에서 전체 경로를 지정하세요.", + "sembleDescription": "로컬 하이브리드 검색 — API 키 불필요." }, "autoApprove": { "toggleShortcut": "IDE 환경 설정에서 이 설정에 대한 전역 바로 가기를 구성할 수 있습니다.", diff --git a/webview-ui/src/i18n/locales/nl/settings.json b/webview-ui/src/i18n/locales/nl/settings.json index a91bb87de5..ab21391a3c 100644 --- a/webview-ui/src/i18n/locales/nl/settings.json +++ b/webview-ui/src/i18n/locales/nl/settings.json @@ -213,7 +213,16 @@ "stoppingButton": "Stoppen...", "workspaceToggleLabel": "Indexering inschakelen voor deze werkruimte", "workspaceDisabledMessage": "Indexering is geconfigureerd maar niet ingeschakeld voor deze werkruimte.", - "autoEnableDefaultLabel": "Indexering automatisch inschakelen voor nieuwe werkruimtes" + "autoEnableDefaultLabel": "Indexering automatisch inschakelen voor nieuwe werkruimtes", + "sembleProvider": "Semble - Lokaal", + "semblePathLabel": "Semble-pad", + "semblePathPlaceholder": "semble", + "semblePathDescription": "Pad naar het semble-uitvoerbestand. Laat het op \"semble\" als het in uw PATH staat, of geef het volledige pad op (bijv. \"/usr/local/bin/semble\").", + "sembleInstallTitle": "Semble installeren", + "sembleInstallDescription": "Semble moet via pip geïnstalleerd worden voor gebruik:", + "sembleInstallCommand": "pip install semble", + "sembleInstallNote": "Vereist Python 3.10+. Zorg ervoor dat het geïnstalleerde semble-binair in uw PATH staat, of geef hierboven het volledige pad op.", + "sembleDescription": "Lokaal hybride zoeken — geen API-sleutels vereist." }, "autoApprove": { "toggleShortcut": "U kunt een globale sneltoets voor deze instelling configureren in de voorkeuren van uw IDE.", diff --git a/webview-ui/src/i18n/locales/pl/settings.json b/webview-ui/src/i18n/locales/pl/settings.json index f3751196d8..25a5605723 100644 --- a/webview-ui/src/i18n/locales/pl/settings.json +++ b/webview-ui/src/i18n/locales/pl/settings.json @@ -213,7 +213,16 @@ "stoppingButton": "Zatrzymywanie...", "workspaceToggleLabel": "Włącz indeksowanie dla tego workspace'a", "workspaceDisabledMessage": "Indeksowanie jest skonfigurowane, ale nie włączone dla tego workspace'a.", - "autoEnableDefaultLabel": "Automatycznie włączaj indeksowanie dla nowych workspace'ów" + "autoEnableDefaultLabel": "Automatycznie włączaj indeksowanie dla nowych workspace'ów", + "sembleProvider": "Semble - Lokalny", + "semblePathLabel": "Ścieżka Semble", + "semblePathPlaceholder": "semble", + "semblePathDescription": "Ścieżka do pliku wykonywalnego semble. Pozostaw \"semble\" jeśli jest w PATH, lub podaj pełną ścieżkę (np. \"/usr/local/bin/semble\").", + "sembleInstallTitle": "Zainstaluj semble", + "sembleInstallDescription": "Semble musi być zainstalowany przez pip przed użyciem:", + "sembleInstallCommand": "pip install semble", + "sembleInstallNote": "Wymaga Python 3.10+. Upewnij się, że zainstalowany plik binarny semble jest w PATH, lub podaj pełną ścieżkę powyżej.", + "sembleDescription": "Lokalne wyszukiwanie hybrydowe — klucze API nie są wymagane." }, "autoApprove": { "toggleShortcut": "Możesz skonfigurować globalny skrót dla tego ustawienia w preferencjach swojego IDE.", diff --git a/webview-ui/src/i18n/locales/pt-BR/settings.json b/webview-ui/src/i18n/locales/pt-BR/settings.json index 05d3557f76..b6b988a1bf 100644 --- a/webview-ui/src/i18n/locales/pt-BR/settings.json +++ b/webview-ui/src/i18n/locales/pt-BR/settings.json @@ -213,7 +213,16 @@ "stoppingButton": "Parando...", "workspaceToggleLabel": "Ativar indexação para este workspace", "workspaceDisabledMessage": "A indexação está configurada, mas não ativada para este workspace.", - "autoEnableDefaultLabel": "Ativar indexação automaticamente para novos workspaces" + "autoEnableDefaultLabel": "Ativar indexação automaticamente para novos workspaces", + "sembleProvider": "Semble - Local", + "semblePathLabel": "Caminho do Semble", + "semblePathPlaceholder": "semble", + "semblePathDescription": "Caminho para o executável do semble. Deixe como \"semble\" se estiver no seu PATH, ou forneça o caminho completo (ex. \"/usr/local/bin/semble\").", + "sembleInstallTitle": "Instalar semble", + "sembleInstallDescription": "O Semble deve ser instalado via pip antes do uso:", + "sembleInstallCommand": "pip install semble", + "sembleInstallNote": "Requer Python 3.10+. Certifique-se de que o binário semble instalado está no seu PATH, ou especifique o caminho completo acima.", + "sembleDescription": "Busca híbrida local — nenhuma chave de API necessária." }, "autoApprove": { "toggleShortcut": "Você pode configurar um atalho global para esta configuração nas preferências do seu IDE.", diff --git a/webview-ui/src/i18n/locales/ru/settings.json b/webview-ui/src/i18n/locales/ru/settings.json index 1aedb8a575..aa871c086a 100644 --- a/webview-ui/src/i18n/locales/ru/settings.json +++ b/webview-ui/src/i18n/locales/ru/settings.json @@ -213,7 +213,16 @@ "stoppingButton": "Остановка...", "workspaceToggleLabel": "Включить индексацию для этого рабочего пространства", "workspaceDisabledMessage": "Индексация настроена, но не включена для этого рабочего пространства.", - "autoEnableDefaultLabel": "Автоматически включать индексацию для новых рабочих пространств" + "autoEnableDefaultLabel": "Автоматически включать индексацию для новых рабочих пространств", + "sembleProvider": "Semble - Локальный", + "semblePathLabel": "Путь к Semble", + "semblePathPlaceholder": "semble", + "semblePathDescription": "Путь к исполняемому файлу semble. Оставьте \"semble\", если он в вашем PATH, или укажите полный путь (напр. \"/usr/local/bin/semble\").", + "sembleInstallTitle": "Установить semble", + "sembleInstallDescription": "Semble необходимо установить через pip перед использованием:", + "sembleInstallCommand": "pip install semble", + "sembleInstallNote": "Требуется Python 3.10+. Убедитесь, что установленный бинарник semble находится в вашем PATH, или укажите полный путь выше.", + "sembleDescription": "Локальный гибридный поиск — ключи API не требуются." }, "autoApprove": { "toggleShortcut": "Вы можете настроить глобальное сочетание клавиш для этого параметра в настройках вашей IDE.", diff --git a/webview-ui/src/i18n/locales/tr/settings.json b/webview-ui/src/i18n/locales/tr/settings.json index 22bd730488..900c4798d1 100644 --- a/webview-ui/src/i18n/locales/tr/settings.json +++ b/webview-ui/src/i18n/locales/tr/settings.json @@ -213,7 +213,16 @@ "stoppingButton": "Durduruluyor...", "workspaceToggleLabel": "Bu çalışma alanı için indekslemeyi etkinleştir", "workspaceDisabledMessage": "İndeksleme yapılandırıldı ancak bu çalışma alanı için etkinleştirilmedi.", - "autoEnableDefaultLabel": "Yeni çalışma alanları için indekslemeyi otomatik etkinleştir" + "autoEnableDefaultLabel": "Yeni çalışma alanları için indekslemeyi otomatik etkinleştir", + "sembleProvider": "Semble - Yerel", + "semblePathLabel": "Semble Yolu", + "semblePathPlaceholder": "semble", + "semblePathDescription": "semble yürütülebilir dosyasının yolu. PATH'inizde ise \"semble\" olarak bırakın veya tam yolu girin (örn. \"/usr/local/bin/semble\").", + "sembleInstallTitle": "semble'ı yükle", + "sembleInstallDescription": "Kullanmadan önce Semble pip ile yüklenmelidir:", + "sembleInstallCommand": "pip install semble", + "sembleInstallNote": "Python 3.10+ gerektirir. Yüklenen semble ikili dosyasının PATH'inizde olduğundan emin olun veya yukarıda tam yolu belirtin.", + "sembleDescription": "Yerel hibrit arama — API anahtarı gerektirmez." }, "autoApprove": { "toggleShortcut": "IDE tercihlerinizde bu ayar için genel bir kısayol yapılandırabilirsiniz.", diff --git a/webview-ui/src/i18n/locales/vi/settings.json b/webview-ui/src/i18n/locales/vi/settings.json index 0b09ae004b..ea705870b4 100644 --- a/webview-ui/src/i18n/locales/vi/settings.json +++ b/webview-ui/src/i18n/locales/vi/settings.json @@ -213,7 +213,16 @@ "stoppingButton": "Đang dừng...", "workspaceToggleLabel": "Bật lập chỉ mục cho không gian làm việc này", "workspaceDisabledMessage": "Lập chỉ mục đã được cấu hình nhưng chưa được bật cho không gian làm việc này.", - "autoEnableDefaultLabel": "Tự động bật lập chỉ mục cho không gian làm việc mới" + "autoEnableDefaultLabel": "Tự động bật lập chỉ mục cho không gian làm việc mới", + "sembleProvider": "Semble - Cục bộ", + "semblePathLabel": "Đường dẫn Semble", + "semblePathPlaceholder": "semble", + "semblePathDescription": "Đường dẫn đến tệp thực thi semble. Để \"semble\" nếu nó có trong PATH của bạn, hoặc cung cấp đường dẫn đầy đủ (ví dụ: \"/usr/local/bin/semble\").", + "sembleInstallTitle": "Cài đặt semble", + "sembleInstallDescription": "Semble phải được cài đặt qua pip trước khi sử dụng:", + "sembleInstallCommand": "pip install semble", + "sembleInstallNote": "Yêu cầu Python 3.10+. Đảm bảo tệp nhị phân semble đã cài đặt nằm trong PATH của bạn, hoặc chỉ định đường dẫn đầy đủ ở trên.", + "sembleDescription": "Tìm kiếm kết hợp cục bộ — không cần khóa API." }, "autoApprove": { "toggleShortcut": "Bạn có thể định cấu hình một phím tắt chung cho cài đặt này trong tùy chọn IDE của bạn.", diff --git a/webview-ui/src/i18n/locales/zh-CN/settings.json b/webview-ui/src/i18n/locales/zh-CN/settings.json index f58ea87f8c..bf1c88c0bd 100644 --- a/webview-ui/src/i18n/locales/zh-CN/settings.json +++ b/webview-ui/src/i18n/locales/zh-CN/settings.json @@ -213,7 +213,16 @@ "stoppingButton": "正在停止...", "workspaceToggleLabel": "为此工作区启用索引", "workspaceDisabledMessage": "索引已配置,但尚未为此工作区启用。", - "autoEnableDefaultLabel": "自动为新工作区启用索引" + "autoEnableDefaultLabel": "自动为新工作区启用索引", + "sembleProvider": "Semble - 本地", + "semblePathLabel": "Semble 路径", + "semblePathPlaceholder": "semble", + "semblePathDescription": "semble 可执行文件的路径。如果在您的 PATH 中,保持为 \"semble\",或提供完整路径(例如 \"/usr/local/bin/semble\")。", + "sembleInstallTitle": "安装 semble", + "sembleInstallDescription": "使用前必须通过 pip 安装 Semble:", + "sembleInstallCommand": "pip install semble", + "sembleInstallNote": "需要 Python 3.10+。确保已安装的 semble 二进制文件在您的 PATH 中,或在上方指定完整路径。", + "sembleDescription": "本地混合搜索 - 无需 API 密钥。" }, "autoApprove": { "toggleShortcut": "您可以在 IDE 首选项中为此设置配置全局快捷方式。", diff --git a/webview-ui/src/i18n/locales/zh-TW/settings.json b/webview-ui/src/i18n/locales/zh-TW/settings.json index e4268280de..0c053425a0 100644 --- a/webview-ui/src/i18n/locales/zh-TW/settings.json +++ b/webview-ui/src/i18n/locales/zh-TW/settings.json @@ -223,7 +223,16 @@ "stoppingButton": "正在停止...", "workspaceToggleLabel": "為此工作區啟用索引", "workspaceDisabledMessage": "索引已設定,但尚未為此工作區啟用。", - "autoEnableDefaultLabel": "自動為新工作區啟用索引" + "autoEnableDefaultLabel": "自動為新工作區啟用索引", + "sembleProvider": "Semble - 本機", + "semblePathLabel": "Semble 路徑", + "semblePathPlaceholder": "semble", + "semblePathDescription": "semble 可執行檔的路徑。如果在您的 PATH 中,保持為 \"semble\",或提供完整路徑(例如 \"/usr/local/bin/semble\")。", + "sembleInstallTitle": "安裝 semble", + "sembleInstallDescription": "使用前必須透過 pip 安裝 Semble:", + "sembleInstallCommand": "pip install semble", + "sembleInstallNote": "需要 Python 3.10+。確保已安裝的 semble 二進位檔在您的 PATH 中,或在上方指定完整路徑。", + "sembleDescription": "本機混合搜尋 - 無需 API 金鑰。" }, "autoApprove": { "description": "無需詢問許可即可執行下列動作。請僅在您完全信任且了解安全風險的情況下啟用此功能。", From 43c815334fbfcc26ca43c27d767237e942772a24 Mon Sep 17 00:00:00 2001 From: Naved Date: Sat, 30 May 2026 12:02:18 -0700 Subject: [PATCH 03/11] Use in built semble --- packages/types/src/vscode-extension-host.ts | 6 + src/core/webview/ClineProvider.ts | 2 + .../semble/__tests__/provider.spec.ts | 101 +++-- .../semble/__tests__/semble-cli.spec.ts | 57 +-- .../__tests__/semble-downloader.spec.ts | 353 ++++++++++++++++++ src/services/code-index/semble/index.ts | 15 +- src/services/code-index/semble/provider.ts | 52 ++- src/services/code-index/semble/semble-cli.ts | 88 +---- .../code-index/semble/semble-downloader.ts | 270 ++++++++++++++ src/services/code-index/semble/types.ts | 3 +- .../src/components/chat/CodeIndexPopover.tsx | 52 +-- webview-ui/src/i18n/locales/ca/settings.json | 10 +- webview-ui/src/i18n/locales/de/settings.json | 10 +- webview-ui/src/i18n/locales/en/settings.json | 8 - webview-ui/src/i18n/locales/es/settings.json | 10 +- webview-ui/src/i18n/locales/fr/settings.json | 10 +- webview-ui/src/i18n/locales/hi/settings.json | 10 +- webview-ui/src/i18n/locales/id/settings.json | 10 +- webview-ui/src/i18n/locales/it/settings.json | 10 +- webview-ui/src/i18n/locales/ja/settings.json | 10 +- webview-ui/src/i18n/locales/ko/settings.json | 10 +- webview-ui/src/i18n/locales/nl/settings.json | 10 +- webview-ui/src/i18n/locales/pl/settings.json | 10 +- .../src/i18n/locales/pt-BR/settings.json | 10 +- webview-ui/src/i18n/locales/ru/settings.json | 10 +- webview-ui/src/i18n/locales/tr/settings.json | 10 +- webview-ui/src/i18n/locales/vi/settings.json | 10 +- .../src/i18n/locales/zh-CN/settings.json | 10 +- .../src/i18n/locales/zh-TW/settings.json | 10 +- 29 files changed, 794 insertions(+), 383 deletions(-) create mode 100644 src/services/code-index/semble/__tests__/semble-downloader.spec.ts create mode 100644 src/services/code-index/semble/semble-downloader.ts diff --git a/packages/types/src/vscode-extension-host.ts b/packages/types/src/vscode-extension-host.ts index 92a0c6d4c5..3d5e7a15f4 100644 --- a/packages/types/src/vscode-extension-host.ts +++ b/packages/types/src/vscode-extension-host.ts @@ -375,6 +375,12 @@ export type ExtensionState = Pick< deviceName?: string debug?: boolean + /** + * Platform info for conditional feature support (e.g. semble binary availability). + */ + platform?: string + arch?: string + /** * Monotonically increasing sequence number for clineMessages state pushes. * When present, the frontend should only apply clineMessages from a state push diff --git a/src/core/webview/ClineProvider.ts b/src/core/webview/ClineProvider.ts index 40590056d0..31899756e2 100644 --- a/src/core/webview/ClineProvider.ts +++ b/src/core/webview/ClineProvider.ts @@ -2282,6 +2282,8 @@ export class ClineProvider } })(), ...zooCodeState, + platform: process.platform, + arch: process.arch, debug: vscode.workspace.getConfiguration(Package.name).get("debug", false), } } diff --git a/src/services/code-index/semble/__tests__/provider.spec.ts b/src/services/code-index/semble/__tests__/provider.spec.ts index 8039ac5d65..574142afe4 100644 --- a/src/services/code-index/semble/__tests__/provider.spec.ts +++ b/src/services/code-index/semble/__tests__/provider.spec.ts @@ -3,13 +3,21 @@ import { SembleProvider } from "../provider" import { SembleCLI } from "../semble-cli" import { SEMBLE_DEFAULTS } from "../types" -// Mock SembleCLI +// Mock SembleCLI - use a shared mock instance +const sharedMockCli = { + checkInstalled: vi.fn(), + search: vi.fn(), + findRelated: vi.fn(), +} + vi.mock("../semble-cli", () => ({ - SembleCLI: vi.fn().mockImplementation(() => ({ - checkInstalled: vi.fn(), - search: vi.fn(), - findRelated: vi.fn(), - })), + SembleCLI: vi.fn().mockImplementation(() => sharedMockCli), +})) + +// Mock semble-downloader +vi.mock("../semble-downloader", () => ({ + isSembleSupportedPlatform: vi.fn().mockReturnValue(true), + downloadSemble: vi.fn().mockResolvedValue("/mock/storage/semble/semble"), })) // Mock TelemetryService @@ -28,6 +36,7 @@ vi.mock("vscode", () => ({ import { TelemetryService } from "@roo-code/telemetry" import { TelemetryEventName } from "@roo-code/types" +import { isSembleSupportedPlatform, downloadSemble } from "../semble-downloader" describe("SembleProvider", () => { let provider: SembleProvider @@ -37,15 +46,19 @@ describe("SembleProvider", () => { beforeEach(() => { vi.clearAllMocks() + ;(isSembleSupportedPlatform as any).mockReturnValue(true) + ;(downloadSemble as any).mockResolvedValue("/mock/storage/semble/semble") mockStateManager = { setSystemState: vi.fn(), } - mockContext = {} + mockContext = { + globalStorageUri: { fsPath: "/mock/storage" }, + } provider = new SembleProvider("/workspace", mockContext, mockStateManager, "semble") - mockCli = (SembleCLI as any).mock.results[0].value + mockCli = sharedMockCli }) describe("constructor", () => { @@ -70,11 +83,12 @@ describe("SembleProvider", () => { }) describe("initialize", () => { - it("should set state to Indexed when semble is installed", async () => { - mockCli.checkInstalled.mockResolvedValue({ installed: true, version: "unknown" }) + it("should auto-download and set state to Indexed when semble works", async () => { + mockCli.checkInstalled.mockResolvedValue({ installed: true }) await provider.initialize() + expect(downloadSemble).toHaveBeenCalledWith("/mock/storage") expect(provider.state).toBe("Indexed") expect(mockStateManager.setSystemState).toHaveBeenCalledWith( "Indexed", @@ -82,10 +96,34 @@ describe("SembleProvider", () => { ) }) - it("should set state to Error when semble is not installed", async () => { + it("should set state to Error when platform is unsupported", async () => { + ;(isSembleSupportedPlatform as any).mockReturnValue(false) + + await provider.initialize() + + expect(provider.state).toBe("Error") + expect(mockStateManager.setSystemState).toHaveBeenCalledWith( + "Error", + expect.stringContaining("not supported on this platform"), + ) + }) + + it("should set state to Error when download fails", async () => { + ;(downloadSemble as any).mockRejectedValue(new Error("network error")) + + await provider.initialize() + + expect(provider.state).toBe("Error") + expect(mockStateManager.setSystemState).toHaveBeenCalledWith( + "Error", + expect.stringContaining("Failed to download semble"), + ) + }) + + it("should set state to Error when semble check fails after download", async () => { mockCli.checkInstalled.mockResolvedValue({ installed: false, - error: "semble: command not found", + error: "binary not functional", }) await provider.initialize() @@ -93,23 +131,33 @@ describe("SembleProvider", () => { expect(provider.state).toBe("Error") expect(mockStateManager.setSystemState).toHaveBeenCalledWith( "Error", - expect.stringContaining("semble: command not found"), + expect.stringContaining("binary not functional"), ) }) it("should not re-initialize if already initialized", async () => { - mockCli.checkInstalled.mockResolvedValue({ installed: true, version: "unknown" }) + mockCli.checkInstalled.mockResolvedValue({ installed: true }) await provider.initialize() await provider.initialize() expect(mockCli.checkInstalled).toHaveBeenCalledTimes(1) }) + + it("should skip download when custom semble path is configured", async () => { + const customProvider = new SembleProvider("/workspace", mockContext, mockStateManager, "/custom/semble") + mockCli.checkInstalled.mockResolvedValue({ installed: true }) + + await customProvider.initialize() + + expect(downloadSemble).not.toHaveBeenCalled() + expect(customProvider.state).toBe("Indexed") + }) }) describe("startIndexing", () => { it("should initialize if not already initialized", async () => { - mockCli.checkInstalled.mockResolvedValue({ installed: true, version: "unknown" }) + mockCli.checkInstalled.mockResolvedValue({ installed: true }) await provider.startIndexing() @@ -117,10 +165,7 @@ describe("SembleProvider", () => { }) it("should not change state if in Error state", async () => { - mockCli.checkInstalled.mockResolvedValue({ - installed: false, - error: "not found", - }) + ;(isSembleSupportedPlatform as any).mockReturnValue(false) await provider.initialize() await provider.startIndexing() @@ -129,7 +174,7 @@ describe("SembleProvider", () => { }) it("should mark as Indexed when already initialized", async () => { - mockCli.checkInstalled.mockResolvedValue({ installed: true, version: "unknown" }) + mockCli.checkInstalled.mockResolvedValue({ installed: true }) await provider.initialize() await provider.startIndexing() @@ -148,7 +193,7 @@ describe("SembleProvider", () => { describe("searchIndex", () => { beforeEach(async () => { - mockCli.checkInstalled.mockResolvedValue({ installed: true, version: "unknown" }) + mockCli.checkInstalled.mockResolvedValue({ installed: true }) await provider.initialize() }) @@ -287,14 +332,10 @@ describe("SembleProvider", () => { }) it("should return empty array when in Error state", async () => { + ;(isSembleSupportedPlatform as any).mockReturnValue(false) const errorProvider = new SembleProvider("/workspace", mockContext, mockStateManager) - const errorCli = (SembleCLI as any).mock.results[(SembleCLI as any).mock.results.length - 1].value - errorCli.checkInstalled.mockResolvedValue({ - installed: false, - error: "not found", - }) await errorProvider.initialize() - + ;(isSembleSupportedPlatform as any).mockReturnValue(true) // reset for other tests const results = await errorProvider.searchIndex("test") expect(results).toEqual([]) }) @@ -302,7 +343,7 @@ describe("SembleProvider", () => { describe("clearIndexData", () => { it("should reset state to Standby", async () => { - mockCli.checkInstalled.mockResolvedValue({ installed: true, version: "unknown" }) + mockCli.checkInstalled.mockResolvedValue({ installed: true }) await provider.initialize() await provider.clearIndexData() @@ -317,7 +358,7 @@ describe("SembleProvider", () => { describe("dispose", () => { it("should reset initialization state", async () => { - mockCli.checkInstalled.mockResolvedValue({ installed: true, version: "unknown" }) + mockCli.checkInstalled.mockResolvedValue({ installed: true }) await provider.initialize() provider.dispose() @@ -327,4 +368,4 @@ describe("SembleProvider", () => { expect(results).toEqual([]) }) }) -}) \ No newline at end of file +}) diff --git a/src/services/code-index/semble/__tests__/semble-cli.spec.ts b/src/services/code-index/semble/__tests__/semble-cli.spec.ts index 767a3e976f..3cddc3e433 100644 --- a/src/services/code-index/semble/__tests__/semble-cli.spec.ts +++ b/src/services/code-index/semble/__tests__/semble-cli.spec.ts @@ -63,52 +63,13 @@ describe("SembleCLI", () => { }) describe("checkInstalled", () => { - it("should return installed: true with version when --help and pip show succeed", async () => { - // First call: semble --help; second call: pip show semble - mockSpawn - .mockReturnValueOnce(createMockProcess("usage: semble ...", "", 0)) - .mockReturnValueOnce(createMockProcess("Name: semble\nVersion: 0.3.1\nSummary: ...", "", 0)) + it("should return installed: true when --help succeeds", async () => { + mockSpawn.mockReturnValueOnce(createMockProcess("usage: semble ...", "", 0)) const result = await cli.checkInstalled() - expect(result).toEqual({ installed: true, version: "0.3.1" }) - expect(mockSpawn).toHaveBeenNthCalledWith(1, "semble", ["--help"], expect.objectContaining({ shell: false })) - expect(mockSpawn).toHaveBeenNthCalledWith(2, "pip", ["show", "semble"], expect.any(Object)) - }) - - it("should fall back to pip3 if pip is not available", async () => { - mockSpawn - .mockReturnValueOnce(createMockProcess("usage: semble ...", "", 0)) // --help - .mockReturnValueOnce(createErrorProcess("spawn ENOENT")) // pip fails - .mockReturnValueOnce(createMockProcess("Name: semble\nVersion: 0.3.2\n", "", 0)) // pip3 succeeds - - const result = await cli.checkInstalled() - - expect(result).toEqual({ installed: true, version: "0.3.2" }) - }) - - it("should return installed: true with unknown version when pip cannot find semble", async () => { - mockSpawn - .mockReturnValueOnce(createMockProcess("usage: semble ...", "", 0)) // --help - .mockReturnValueOnce(createMockProcess("", "WARNING: Package(s) not found: semble", 1)) // pip not found - .mockReturnValueOnce(createMockProcess("", "WARNING: Package(s) not found: semble", 1)) // pip3 not found - - const result = await cli.checkInstalled() - - expect(result).toEqual({ installed: true, version: "unknown" }) - }) - - it("should return installed: false when version is below 0.3.0", async () => { - mockSpawn - .mockReturnValueOnce(createMockProcess("usage: semble ...", "", 0)) // --help - .mockReturnValueOnce(createMockProcess("Name: semble\nVersion: 0.2.5\n", "", 0)) // pip show - - const result = await cli.checkInstalled() - - expect(result.installed).toBe(false) - expect(result.error).toContain("0.2.5") - expect(result.error).toContain(">= 0.3.0") - expect(result.error).toContain("pip install --upgrade semble") + expect(result).toEqual({ installed: true }) + expect(mockSpawn).toHaveBeenCalledWith("semble", ["--help"], expect.objectContaining({ shell: false })) }) it("should return installed: false when semble --help fails", async () => { @@ -163,11 +124,7 @@ describe("SembleCLI", () => { await cli.search("test", "/repo", { topK: 5 }) - expect(mockSpawn).toHaveBeenCalledWith( - "semble", - ["search", "test", "/repo", "-k", "5"], - expect.any(Object), - ) + expect(mockSpawn).toHaveBeenCalledWith("semble", ["search", "test", "/repo", "-k", "5"], expect.any(Object)) }) it("should add --content flag for non-default content types", async () => { @@ -229,9 +186,7 @@ describe("SembleCLI", () => { it("should throw error when semble find-related fails", async () => { mockSpawn.mockReturnValue(createMockProcess("", "Error: no chunk found", 1)) - await expect(cli.findRelated("src/auth.ts", 42, "/repo")).rejects.toThrow( - "Semble find-related failed", - ) + await expect(cli.findRelated("src/auth.ts", 42, "/repo")).rejects.toThrow("Semble find-related failed") }) }) diff --git a/src/services/code-index/semble/__tests__/semble-downloader.spec.ts b/src/services/code-index/semble/__tests__/semble-downloader.spec.ts new file mode 100644 index 0000000000..5fab2754e4 --- /dev/null +++ b/src/services/code-index/semble/__tests__/semble-downloader.spec.ts @@ -0,0 +1,353 @@ +import { describe, it, expect, vi, beforeEach } from "vitest" +import * as fs from "fs/promises" +import * as path from "path" +import { EventEmitter } from "events" + +// Mock fs/promises +vi.mock("fs/promises", () => ({ + mkdir: vi.fn().mockResolvedValue(undefined), + access: vi.fn(), + chmod: vi.fn().mockResolvedValue(undefined), + unlink: vi.fn().mockResolvedValue(undefined), + rm: vi.fn().mockResolvedValue(undefined), +})) + +// Mock fs (createWriteStream) +const mockWriteStream = { + on: vi.fn(), + close: vi.fn(), +} +vi.mock("fs", () => ({ + createWriteStream: vi.fn(() => mockWriteStream), +})) + +// Mock https +const mockRequest = new EventEmitter() as any +mockRequest.setTimeout = vi.fn() + +const mockResponse = new EventEmitter() as any +mockResponse.statusCode = 200 +mockResponse.headers = {} +mockResponse.pipe = vi.fn() +mockResponse.destroy = vi.fn() + +vi.mock("https", () => ({ + get: vi.fn((_url: string, callback: (res: any) => void) => { + setImmediate(() => callback(mockResponse)) + return mockRequest + }), +})) + +// Mock child_process spawn for tar/unzip extraction +const mockExtractProcess = new EventEmitter() as any +mockExtractProcess.stderr = new EventEmitter() + +vi.mock("child_process", () => ({ + spawn: vi.fn(() => { + // Simulate successful extraction + setImmediate(() => mockExtractProcess.emit("close", 0)) + return mockExtractProcess + }), +})) + +import { + isSembleSupportedPlatform, + getSembleSupportedPlatforms, + downloadSemble, + getSembleBinaryPath, +} from "../semble-downloader" +import * as https from "https" +import { spawn } from "child_process" + +describe("semble-downloader", () => { + beforeEach(() => { + vi.clearAllMocks() + // Reset response defaults + mockResponse.statusCode = 200 + mockResponse.headers = {} + mockResponse.pipe = vi.fn() + mockResponse.destroy = vi.fn() + mockWriteStream.on = vi.fn() + mockWriteStream.close = vi.fn() + }) + + describe("isSembleSupportedPlatform", () => { + it("should return true for linux-x64", () => { + expect(isSembleSupportedPlatform("linux", "x64")).toBe(true) + }) + + it("should return true for linux-arm64", () => { + expect(isSembleSupportedPlatform("linux", "arm64")).toBe(true) + }) + + it("should return true for darwin-arm64", () => { + expect(isSembleSupportedPlatform("darwin", "arm64")).toBe(true) + }) + + it("should return true for win32-x64", () => { + expect(isSembleSupportedPlatform("win32", "x64")).toBe(true) + }) + + it("should return false for darwin-x64 (Intel Mac not supported)", () => { + expect(isSembleSupportedPlatform("darwin", "x64")).toBe(false) + }) + + it("should return false for win32-arm64", () => { + expect(isSembleSupportedPlatform("win32", "arm64")).toBe(false) + }) + + it("should return false for freebsd-x64", () => { + expect(isSembleSupportedPlatform("freebsd", "x64")).toBe(false) + }) + + it("should use process.platform and process.arch when no args provided", () => { + const result = isSembleSupportedPlatform() + expect(typeof result).toBe("boolean") + }) + }) + + describe("getSembleSupportedPlatforms", () => { + it("should return all supported platform-arch combinations", () => { + const platforms = getSembleSupportedPlatforms() + + expect(platforms).toContain("linux-x64") + expect(platforms).toContain("linux-arm64") + expect(platforms).toContain("darwin-arm64") + expect(platforms).toContain("win32-x64") + expect(platforms).toHaveLength(4) + }) + }) + + describe("downloadSemble", () => { + it("should return undefined on unsupported platform", async () => { + const originalPlatform = Object.getOwnPropertyDescriptor(process, "platform") + const originalArch = Object.getOwnPropertyDescriptor(process, "arch") + + Object.defineProperty(process, "platform", { value: "freebsd", configurable: true }) + Object.defineProperty(process, "arch", { value: "x64", configurable: true }) + + try { + const result = await downloadSemble("/some/dir") + expect(result).toBeUndefined() + } finally { + if (originalPlatform) Object.defineProperty(process, "platform", originalPlatform) + if (originalArch) Object.defineProperty(process, "arch", originalArch) + } + }) + + it("should return existing binary path if already extracted", async () => { + const originalPlatform = Object.getOwnPropertyDescriptor(process, "platform") + const originalArch = Object.getOwnPropertyDescriptor(process, "arch") + + Object.defineProperty(process, "platform", { value: "darwin", configurable: true }) + Object.defineProperty(process, "arch", { value: "arm64", configurable: true }) + + // fs.access resolves => file exists + ;(fs.access as any).mockResolvedValue(undefined) + + try { + const result = await downloadSemble("/storage") + + expect(result).toBe(path.join("/storage", "semble", "semble")) + expect(fs.mkdir).toHaveBeenCalledWith("/storage", { recursive: true }) + expect(fs.chmod).toHaveBeenCalledWith(path.join("/storage", "semble", "semble"), 0o755) + // Should NOT attempt to download + expect(https.get).not.toHaveBeenCalled() + } finally { + if (originalPlatform) Object.defineProperty(process, "platform", originalPlatform) + if (originalArch) Object.defineProperty(process, "arch", originalArch) + } + }) + + it("should download and extract archive when not present", async () => { + const originalPlatform = Object.getOwnPropertyDescriptor(process, "platform") + const originalArch = Object.getOwnPropertyDescriptor(process, "arch") + + Object.defineProperty(process, "platform", { value: "linux", configurable: true }) + Object.defineProperty(process, "arch", { value: "x64", configurable: true }) + + // fs.access rejects => file not present + ;(fs.access as any).mockRejectedValue(new Error("ENOENT")) + + // Simulate successful download: pipe is called, then "finish" fires + mockWriteStream.on.mockImplementation((event: string, cb: () => void) => { + if (event === "finish") { + setImmediate(cb) + } + }) + + try { + const result = await downloadSemble("/storage") + + expect(result).toBe(path.join("/storage", "semble", "semble")) + expect(https.get).toHaveBeenCalledWith( + expect.stringContaining("semble-linux-x64-fast.tar.gz"), + expect.any(Function), + ) + // Should call tar for extraction + expect(spawn).toHaveBeenCalledWith( + "tar", + [ + "-xzf", + path.join("/storage", "semble-linux-x64-fast.tar.gz"), + "-C", + path.join("/storage", "semble"), + ], + expect.any(Object), + ) + expect(fs.chmod).toHaveBeenCalledWith(path.join("/storage", "semble", "semble"), 0o755) + // Archive should be cleaned up + expect(fs.unlink).toHaveBeenCalledWith(path.join("/storage", "semble-linux-x64-fast.tar.gz")) + } finally { + if (originalPlatform) Object.defineProperty(process, "platform", originalPlatform) + if (originalArch) Object.defineProperty(process, "arch", originalArch) + } + }) + + it("should not chmod on windows", async () => { + const originalPlatform = Object.getOwnPropertyDescriptor(process, "platform") + const originalArch = Object.getOwnPropertyDescriptor(process, "arch") + + Object.defineProperty(process, "platform", { value: "win32", configurable: true }) + Object.defineProperty(process, "arch", { value: "x64", configurable: true }) + + // fs.access resolves => file exists + ;(fs.access as any).mockResolvedValue(undefined) + + try { + const result = await downloadSemble("/storage") + + expect(result).toBe(path.join("/storage", "semble", "semble.exe")) + expect(fs.chmod).not.toHaveBeenCalled() + } finally { + if (originalPlatform) Object.defineProperty(process, "platform", originalPlatform) + if (originalArch) Object.defineProperty(process, "arch", originalArch) + } + }) + + it("should throw and clean up on download failure", async () => { + const originalPlatform = Object.getOwnPropertyDescriptor(process, "platform") + const originalArch = Object.getOwnPropertyDescriptor(process, "arch") + + Object.defineProperty(process, "platform", { value: "linux", configurable: true }) + Object.defineProperty(process, "arch", { value: "arm64", configurable: true }) + + // fs.access rejects => file not present + ;(fs.access as any).mockRejectedValue(new Error("ENOENT")) + + // Simulate HTTP error response + mockResponse.statusCode = 404 + + try { + await expect(downloadSemble("/storage")).rejects.toThrow("Failed to download semble") + expect(fs.unlink).toHaveBeenCalledWith(path.join("/storage", "semble-linux-arm64-fast.tar.gz")) + expect(fs.rm).toHaveBeenCalledWith(path.join("/storage", "semble"), { recursive: true, force: true }) + } finally { + if (originalPlatform) Object.defineProperty(process, "platform", originalPlatform) + if (originalArch) Object.defineProperty(process, "arch", originalArch) + } + }) + + it("should follow redirects", async () => { + const originalPlatform = Object.getOwnPropertyDescriptor(process, "platform") + const originalArch = Object.getOwnPropertyDescriptor(process, "arch") + + Object.defineProperty(process, "platform", { value: "darwin", configurable: true }) + Object.defineProperty(process, "arch", { value: "arm64", configurable: true }) + + // fs.access rejects => file not present + ;(fs.access as any).mockRejectedValue(new Error("ENOENT")) + + // First call returns a redirect, second call returns 200 + let callCount = 0 + ;(https.get as any).mockImplementation((_url: string, callback: (res: any) => void) => { + callCount++ + const res = new EventEmitter() as any + if (callCount === 1) { + res.statusCode = 302 + res.headers = { location: "https://cdn.example.com/semble-macos-arm64-fast.tar.gz" } + res.destroy = vi.fn() + } else { + res.statusCode = 200 + res.headers = {} + res.pipe = vi.fn() + res.destroy = vi.fn() + } + setImmediate(() => callback(res)) + + const req = new EventEmitter() as any + req.setTimeout = vi.fn() + return req + }) + + // Simulate successful download on the second response + mockWriteStream.on.mockImplementation((event: string, cb: () => void) => { + if (event === "finish") { + setImmediate(cb) + } + }) + + try { + const result = await downloadSemble("/storage") + + expect(result).toBe(path.join("/storage", "semble", "semble")) + expect(https.get).toHaveBeenCalledTimes(2) + } finally { + if (originalPlatform) Object.defineProperty(process, "platform", originalPlatform) + if (originalArch) Object.defineProperty(process, "arch", originalArch) + } + }) + }) + + describe("getSembleBinaryPath", () => { + it("should return path when binary exists", async () => { + const originalPlatform = Object.getOwnPropertyDescriptor(process, "platform") + const originalArch = Object.getOwnPropertyDescriptor(process, "arch") + + Object.defineProperty(process, "platform", { value: "linux", configurable: true }) + Object.defineProperty(process, "arch", { value: "x64", configurable: true }) + ;(fs.access as any).mockResolvedValue(undefined) + + try { + const result = await getSembleBinaryPath("/storage") + expect(result).toBe(path.join("/storage", "semble", "semble")) + } finally { + if (originalPlatform) Object.defineProperty(process, "platform", originalPlatform) + if (originalArch) Object.defineProperty(process, "arch", originalArch) + } + }) + + it("should return undefined when binary does not exist", async () => { + const originalPlatform = Object.getOwnPropertyDescriptor(process, "platform") + const originalArch = Object.getOwnPropertyDescriptor(process, "arch") + + Object.defineProperty(process, "platform", { value: "linux", configurable: true }) + Object.defineProperty(process, "arch", { value: "x64", configurable: true }) + ;(fs.access as any).mockRejectedValue(new Error("ENOENT")) + + try { + const result = await getSembleBinaryPath("/storage") + expect(result).toBeUndefined() + } finally { + if (originalPlatform) Object.defineProperty(process, "platform", originalPlatform) + if (originalArch) Object.defineProperty(process, "arch", originalArch) + } + }) + + it("should return undefined on unsupported platform", async () => { + const originalPlatform = Object.getOwnPropertyDescriptor(process, "platform") + const originalArch = Object.getOwnPropertyDescriptor(process, "arch") + + Object.defineProperty(process, "platform", { value: "freebsd", configurable: true }) + Object.defineProperty(process, "arch", { value: "x64", configurable: true }) + + try { + const result = await getSembleBinaryPath("/storage") + expect(result).toBeUndefined() + } finally { + if (originalPlatform) Object.defineProperty(process, "platform", originalPlatform) + if (originalArch) Object.defineProperty(process, "arch", originalArch) + } + }) + }) +}) diff --git a/src/services/code-index/semble/index.ts b/src/services/code-index/semble/index.ts index 4eafe50e69..e63115e076 100644 --- a/src/services/code-index/semble/index.ts +++ b/src/services/code-index/semble/index.ts @@ -1,4 +1,17 @@ export { SembleCLI } from "./semble-cli" export { SembleProvider } from "./provider" -export type { ISembleProvider, SembleSearchResult, SembleChunk, SembleCheckResult, SembleConfig, SembleContentType } from "./types" +export { + isSembleSupportedPlatform, + getSembleSupportedPlatforms, + downloadSemble, + getSembleBinaryPath, +} from "./semble-downloader" +export type { + ISembleProvider, + SembleSearchResult, + SembleChunk, + SembleCheckResult, + SembleConfig, + SembleContentType, +} from "./types" export { SEMBLE_DEFAULTS } from "./types" diff --git a/src/services/code-index/semble/provider.ts b/src/services/code-index/semble/provider.ts index 59c3b819a1..5f9b69a490 100644 --- a/src/services/code-index/semble/provider.ts +++ b/src/services/code-index/semble/provider.ts @@ -5,6 +5,7 @@ import { IndexingState } from "../interfaces/manager" import { VectorStoreSearchResult } from "../interfaces/vector-store" import { CodeIndexStateManager } from "../state-manager" import { SembleCLI } from "./semble-cli" +import { downloadSemble, isSembleSupportedPlatform } from "./semble-downloader" import { ISembleProvider, SembleConfig, SembleContentType, SembleSearchResult, SEMBLE_DEFAULTS } from "./types" import { TelemetryService } from "@roo-code/telemetry" import { TelemetryEventName } from "@roo-code/types" @@ -13,29 +14,31 @@ import { TelemetryEventName } from "@roo-code/types" * Orchestrates code search via the semble CLI. * * Semble indexes on-the-fly with each search call — there is no separate - * "indexing" step. The provider simply validates that semble is installed, - * then delegates search queries to `semble search`. + * "indexing" step. The provider automatically downloads the semble binary + * on first use, then delegates search queries to `semble search`. * * When `embedderProvider === "semble"`, the CodeIndexManager delegates * to this provider instead of the ServiceFactory → orchestrator pipeline. */ export class SembleProvider implements ISembleProvider { - private readonly cli: SembleCLI + private cli: SembleCLI private readonly workspacePath: string private readonly config: SembleConfig private readonly stateManager: CodeIndexStateManager + private readonly context: vscode.ExtensionContext private _state: IndexingState = "Standby" private _isInitialized = false constructor( workspacePath: string, - _context: vscode.ExtensionContext, + context: vscode.ExtensionContext, stateManager: CodeIndexStateManager, semblePath: string = SEMBLE_DEFAULTS.DEFAULT_PATH, options?: { topK?: number; content?: SembleContentType }, ) { this.workspacePath = workspacePath + this.context = context this.stateManager = stateManager this.config = { @@ -52,24 +55,49 @@ export class SembleProvider implements ISembleProvider { } /** - * Initializes the provider: checks semble is installed. + * Initializes the provider: downloads semble if needed, then validates it works. */ async initialize(): Promise { if (this._isInitialized) { return } - // Check if semble is installed - const checkResult = await this.cli.checkInstalled() - - if (!checkResult.installed) { - const errorMsg = checkResult.error || "Semble is not installed" + // Check platform support + if (!isSembleSupportedPlatform()) { this._state = "Error" this.stateManager.setSystemState( "Error", - `Semble not found. Install with 'pip install semble' or set the semble path in settings. Error: ${errorMsg}`, + `Semble is not supported on this platform (${process.platform}-${process.arch}).`, ) - console.error("[SembleProvider] Semble not found:", errorMsg) + console.error(`[SembleProvider] Unsupported platform: ${process.platform}-${process.arch}`) + return + } + + // Auto-download semble if no custom path is configured + if (this.config.semblePath === SEMBLE_DEFAULTS.DEFAULT_PATH) { + try { + this.stateManager.setSystemState("Indexing", "Downloading semble binary...") + const storageDir = this.context.globalStorageUri.fsPath + const binaryPath = await downloadSemble(storageDir) + if (binaryPath) { + this.cli = new SembleCLI(binaryPath) + } + } catch (error: any) { + this._state = "Error" + this.stateManager.setSystemState("Error", `Failed to download semble: ${error?.message || error}`) + console.error("[SembleProvider] Download failed:", error?.message || error) + return + } + } + + // Verify the binary works + const checkResult = await this.cli.checkInstalled() + + if (!checkResult.installed) { + const errorMsg = checkResult.error || "Semble binary is not functional" + this._state = "Error" + this.stateManager.setSystemState("Error", `Semble check failed: ${errorMsg}`) + console.error("[SembleProvider] Semble check failed:", errorMsg) return } diff --git a/src/services/code-index/semble/semble-cli.ts b/src/services/code-index/semble/semble-cli.ts index 2e9e85af65..a5fcbf164a 100644 --- a/src/services/code-index/semble/semble-cli.ts +++ b/src/services/code-index/semble/semble-cli.ts @@ -5,8 +5,8 @@ import { SembleSearchResult, SembleCheckResult, SembleContentType, SEMBLE_DEFAUL /** * Wraps the `semble` CLI for programmatic access. * - * Semble must be installed via pip: `pip install semble` - * The semblePath should be a direct path to the executable (e.g. "semble" or "/usr/local/bin/semble"). + * The semble binary is automatically downloaded on enablement via semble-downloader.ts. + * The semblePath should be a direct path to the executable. * * All methods spawn the semble process via child_process.spawn with array * arguments (no shell) to prevent shell injection. @@ -29,45 +29,18 @@ export class SembleCLI { } /** - * Checks whether semble is installed and meets the minimum version requirement (0.3.0). - * - * - Confirms the executable runs via `semble --help`. - * - Queries `pip show semble` (falling back to `pip3`) to get the installed version - * and validates >= 0.3.0. + * Checks whether the semble binary is functional by running `semble --help`. */ async checkInstalled(): Promise { - // 1. Confirm the executable is runnable try { await this._spawn(["--help"], { timeout: 10_000 }) + return { installed: true } } catch (error: any) { return { installed: false, error: error?.stderr?.trim() || error?.message || "Failed to run semble", } } - - // 2. Query pip for the installed semble version - const version = await this._getPipVersion() - if (!version) { - // pip couldn't find it — semble may be installed outside pip, allow it - return { installed: true, version: "unknown" } - } - - // 3. Validate >= 0.3.0 - const match = version.match(/^(\d+)\.(\d+)\.(\d+)/) - if (!match) { - return { installed: true, version: "unknown" } - } - - const [major, minor] = [Number(match[1]), Number(match[2])] - if (major === 0 && minor < 3) { - return { - installed: false, - error: `Semble version ${version} is not supported. Please upgrade to semble >= 0.3.0 (run: pip install --upgrade semble).`, - } - } - - return { installed: true, version } } /** @@ -123,26 +96,6 @@ export class SembleCLI { } } - /** - * Queries `pip show semble` (falling back to `pip3`) and returns the version string, - * or `undefined` if semble is not found in pip or pip is unavailable. - */ - private async _getPipVersion(): Promise { - for (const pipCmd of ["pip", "pip3"]) { - try { - const stdout = await this._spawnExternal(pipCmd, ["show", "semble"], { timeout: 10_000 }) - // pip show outputs lines like "Version: 0.3.1" - const match = stdout.match(/^Version:\s*(.+)$/m) - if (match) { - return match[1].trim() - } - } catch { - // try next - } - } - return undefined - } - /** * Spawns the semble process and collects stdout/stderr. * Uses spawn without shell — args are passed as an array, no injection risk. @@ -181,39 +134,6 @@ export class SembleCLI { }) } - /** - * Spawns an arbitrary external command (not the semble executable) and returns stdout. - */ - private _spawnExternal(cmd: string, args: string[], options: { timeout: number }): Promise { - return new Promise((resolve, reject) => { - const child = spawn(cmd, args, { - shell: false, - timeout: options.timeout, - stdio: ["ignore", "pipe", "pipe"], - } as any) - - let stdout = "" - let stderr = "" - - child.stdout?.on("data", (data: Buffer) => { - stdout += data.toString() - }) - child.stderr?.on("data", (data: Buffer) => { - stderr += data.toString() - }) - child.on("error", (err: Error) => { - reject({ message: err.message, stderr }) - }) - child.on("close", (code: number | null) => { - if (code === 0) { - resolve(stdout) - } else { - reject({ message: `Process exited with code ${code}`, stderr }) - } - }) - }) - } - /** * Parses semble CLI JSON output into structured results. * diff --git a/src/services/code-index/semble/semble-downloader.ts b/src/services/code-index/semble/semble-downloader.ts new file mode 100644 index 0000000000..bd064e04a8 --- /dev/null +++ b/src/services/code-index/semble/semble-downloader.ts @@ -0,0 +1,270 @@ +import * as fs from "fs/promises" +import * as path from "path" +import * as https from "https" +import { createWriteStream } from "fs" +import { spawn } from "child_process" + +/** + * Supported platform/arch combinations for the semble standalone executable. + * Maps to archive names at https://github.com/navedmerchant/sembleexec/releases + * + * Uses "fast-start" archives (one-dir builds) for ~20x faster startup + * compared to single-file binaries. + */ +const SEMBLE_ARCHIVES: Record = { + "linux-x64": { archive: "semble-linux-x64-fast.tar.gz", binary: "semble" }, + "linux-arm64": { archive: "semble-linux-arm64-fast.tar.gz", binary: "semble" }, + "darwin-arm64": { archive: "semble-macos-arm64-fast.tar.gz", binary: "semble" }, + "win32-x64": { archive: "semble-windows-x64-fast.zip", binary: "semble.exe" }, +} + +const SEMBLE_VERSION = "v0.3.1" +const DOWNLOAD_BASE_URL = `https://github.com/navedmerchant/sembleexec/releases/download/${SEMBLE_VERSION}` + +/** + * Returns whether the current platform/arch has a prebuilt semble binary available. + */ +export function isSembleSupportedPlatform(platform?: string, arch?: string): boolean { + const p = platform ?? process.platform + const a = arch ?? process.arch + return `${p}-${a}` in SEMBLE_ARCHIVES +} + +/** + * Returns the list of supported platform-arch keys (e.g. "linux-x64", "darwin-arm64"). + */ +export function getSembleSupportedPlatforms(): string[] { + return Object.keys(SEMBLE_ARCHIVES) +} + +/** + * Returns the archive info for the given platform/arch, or undefined if unsupported. + */ +function getArchiveInfo(platform?: string, arch?: string): { archive: string; binary: string } | undefined { + const p = platform ?? process.platform + const a = arch ?? process.arch + return SEMBLE_ARCHIVES[`${p}-${a}`] +} + +/** + * Downloads and extracts the semble archive for the current platform. + * + * The archive is extracted into `storageDir/semble/` and the binary path + * is `storageDir/semble/`. + * + * @param storageDir - Directory to store the extracted binary (e.g. globalStorageUri.fsPath) + * @returns The full path to the semble executable, or undefined if the platform is unsupported. + */ +export async function downloadSemble(storageDir: string): Promise { + const info = getArchiveInfo() + if (!info) { + return undefined + } + + // Ensure storage directory exists + await fs.mkdir(storageDir, { recursive: true }) + + const extractDir = path.join(storageDir, "semble") + const binaryPath = path.join(extractDir, info.binary) + + // Check if already downloaded and extracted + try { + await fs.access(binaryPath) + // Binary exists, make sure it's executable + if (process.platform !== "win32") { + await fs.chmod(binaryPath, 0o755) + } + return binaryPath + } catch { + // Not present, download and extract it + } + + const url = `${DOWNLOAD_BASE_URL}/${info.archive}` + const archivePath = path.join(storageDir, info.archive) + console.log(`[SembleDownloader] Downloading semble from ${url}`) + + try { + await downloadFile(url, archivePath) + + // Extract the archive + await fs.mkdir(extractDir, { recursive: true }) + + if (info.archive.endsWith(".tar.gz")) { + await extractTarGz(archivePath, extractDir) + } else if (info.archive.endsWith(".zip")) { + await extractZip(archivePath, extractDir) + } + + // Make binary executable on unix platforms + if (process.platform !== "win32") { + await fs.chmod(binaryPath, 0o755) + } + + // Clean up the archive file + try { + await fs.unlink(archivePath) + } catch { + // ignore cleanup errors + } + + console.log(`[SembleDownloader] Successfully extracted semble to ${binaryPath}`) + return binaryPath + } catch (error: any) { + // Clean up partial download/extraction + try { + await fs.unlink(archivePath) + } catch { + // ignore cleanup errors + } + try { + await fs.rm(extractDir, { recursive: true, force: true }) + } catch { + // ignore cleanup errors + } + console.error(`[SembleDownloader] Failed to download semble: ${error?.message || error}`) + throw new Error(`Failed to download semble: ${error?.message || error}`) + } +} + +/** + * Returns the path to the semble binary if it's already been downloaded, or undefined. + */ +export async function getSembleBinaryPath(storageDir: string): Promise { + const info = getArchiveInfo() + if (!info) { + return undefined + } + + const binaryPath = path.join(storageDir, "semble", info.binary) + + try { + await fs.access(binaryPath) + return binaryPath + } catch { + return undefined + } +} + +/** + * Extracts a .tar.gz archive into the destination directory using the system `tar` command. + */ +function extractTarGz(archivePath: string, destDir: string): Promise { + return new Promise((resolve, reject) => { + const child = spawn("tar", ["-xzf", archivePath, "-C", destDir], { + shell: false, + stdio: ["ignore", "pipe", "pipe"], + }) + + let stderr = "" + child.stderr?.on("data", (data: Buffer) => { + stderr += data.toString() + }) + + child.on("error", (err) => reject(err)) + child.on("close", (code) => { + if (code === 0) { + resolve() + } else { + reject(new Error(`tar extraction failed (code ${code}): ${stderr.trim()}`)) + } + }) + }) +} + +/** + * Extracts a .zip archive into the destination directory. + * Uses PowerShell on Windows, unzip on other platforms. + */ +function extractZip(archivePath: string, destDir: string): Promise { + return new Promise((resolve, reject) => { + let child + + if (process.platform === "win32") { + child = spawn( + "powershell", + [ + "-NoProfile", + "-Command", + `Expand-Archive -Path '${archivePath}' -DestinationPath '${destDir}' -Force`, + ], + { shell: false, stdio: ["ignore", "pipe", "pipe"] }, + ) + } else { + child = spawn("unzip", ["-o", archivePath, "-d", destDir], { + shell: false, + stdio: ["ignore", "pipe", "pipe"], + }) + } + + let stderr = "" + child.stderr?.on("data", (data: Buffer) => { + stderr += data.toString() + }) + + child.on("error", (err) => reject(err)) + child.on("close", (code) => { + if (code === 0) { + resolve() + } else { + reject(new Error(`zip extraction failed (code ${code}): ${stderr.trim()}`)) + } + }) + }) +} + +/** + * Downloads a file from the given URL to the destination path. + * Follows redirects (GitHub releases use 302 redirects to CDN). + */ +function downloadFile(url: string, destPath: string, maxRedirects = 5): Promise { + return new Promise((resolve, reject) => { + if (maxRedirects <= 0) { + reject(new Error("Too many redirects")) + return + } + + const request = https.get(url, (response) => { + // Follow redirects + if ( + response.statusCode && + response.statusCode >= 300 && + response.statusCode < 400 && + response.headers.location + ) { + response.destroy() + downloadFile(response.headers.location, destPath, maxRedirects - 1) + .then(resolve) + .catch(reject) + return + } + + if (response.statusCode !== 200) { + response.destroy() + reject(new Error(`HTTP ${response.statusCode}: Failed to download ${url}`)) + return + } + + const file = createWriteStream(destPath) + response.pipe(file) + + file.on("finish", () => { + file.close() + resolve() + }) + + file.on("error", (err) => { + file.close() + reject(err) + }) + }) + + request.on("error", reject) + request.on("timeout", () => { + request.destroy() + reject(new Error("Download timed out")) + }) + + // 2 minute timeout for download + request.setTimeout(120_000) + }) +} diff --git a/src/services/code-index/semble/types.ts b/src/services/code-index/semble/types.ts index 3657e4e60d..d41263fba8 100644 --- a/src/services/code-index/semble/types.ts +++ b/src/services/code-index/semble/types.ts @@ -30,11 +30,10 @@ export interface SembleSearchResult { } /** - * Result from checking if semble is installed. + * Result from checking if semble is functional. */ export interface SembleCheckResult { installed: boolean - version?: string error?: string } diff --git a/webview-ui/src/components/chat/CodeIndexPopover.tsx b/webview-ui/src/components/chat/CodeIndexPopover.tsx index dc9d5e7d39..97e2803a1e 100644 --- a/webview-ui/src/components/chat/CodeIndexPopover.tsx +++ b/webview-ui/src/components/chat/CodeIndexPopover.tsx @@ -195,7 +195,7 @@ export const CodeIndexPopover: React.FC = ({ }) => { const SECRET_PLACEHOLDER = "••••••••••••••••" const { t } = useAppTranslation() - const { codebaseIndexConfig, codebaseIndexModels, cwd, apiConfiguration } = useExtensionState() + const { codebaseIndexConfig, codebaseIndexModels, cwd, apiConfiguration, platform, arch } = useExtensionState() const [open, setOpen] = useState(false) const [isAdvancedSettingsOpen, setIsAdvancedSettingsOpen] = useState(false) const [isSetupSettingsOpen, setIsSetupSettingsOpen] = useState(false) @@ -205,6 +205,10 @@ export const CodeIndexPopover: React.FC = ({ const [saveStatus, setSaveStatus] = useState<"idle" | "saving" | "saved" | "error">("idle") const [saveError, setSaveError] = useState(null) + // Check if semble is supported on the current platform + const SEMBLE_SUPPORTED_PLATFORMS = ["linux-x64", "linux-arm64", "darwin-arm64", "win32-x64"] + const isSembleSupported = platform && arch ? SEMBLE_SUPPORTED_PLATFORMS.includes(`${platform}-${arch}`) : false + // Form validation state const [formErrors, setFormErrors] = useState>({}) @@ -771,9 +775,11 @@ export const CodeIndexPopover: React.FC = ({ {t("settings:codeIndex.openRouterProvider")} - - {t("settings:codeIndex.sembleProvider")} - + {isSembleSupported && ( + + {t("settings:codeIndex.sembleProvider")} + + )} @@ -1443,44 +1449,6 @@ export const CodeIndexPopover: React.FC = ({ )} - {currentSettings.codebaseIndexEmbedderProvider === "semble" && ( - <> -
-

- {t("settings:codeIndex.sembleInstallTitle")} -

-

- {t("settings:codeIndex.sembleInstallDescription")} -

- - {t("settings:codeIndex.sembleInstallCommand")} - -

- {t("settings:codeIndex.sembleInstallNote")} -

-
-
- - - updateSetting("codebaseIndexSemblePath", e.target.value) - } - placeholder={t("settings:codeIndex.semblePathPlaceholder")} - className="w-full" - /> -

- {t("settings:codeIndex.semblePathDescription")} -

-
-

- {t("settings:codeIndex.sembleDescription")} -

- - )} - {/* Qdrant Settings — hidden for semble */} {currentSettings.codebaseIndexEmbedderProvider !== "semble" && ( <> diff --git a/webview-ui/src/i18n/locales/ca/settings.json b/webview-ui/src/i18n/locales/ca/settings.json index 3840db1c22..b757f0c84c 100644 --- a/webview-ui/src/i18n/locales/ca/settings.json +++ b/webview-ui/src/i18n/locales/ca/settings.json @@ -214,15 +214,7 @@ "workspaceToggleLabel": "Activar la indexació per a aquest espai de treball", "workspaceDisabledMessage": "La indexació està configurada però no habilitada per a aquest espai de treball.", "autoEnableDefaultLabel": "Habilitar automàticament la indexació per a nous espais de treball", - "sembleProvider": "Semble - Local", - "semblePathLabel": "Ruta de Semble", - "semblePathPlaceholder": "semble", - "semblePathDescription": "Ruta a l'executable de semble. Deixeu-ho com a \"semble\" si està al vostre PATH, o proporcioneu la ruta completa (p. ex. \"/usr/local/bin/semble\").", - "sembleInstallTitle": "Instal·lar semble", - "sembleInstallDescription": "Semble s'ha d'instal·lar via pip abans d'usar-lo:", - "sembleInstallCommand": "pip install semble", - "sembleInstallNote": "Requereix Python 3.10+. Assegureu-vos que el binari semble instal·lat estigui al vostre PATH, o especifiqueu la ruta completa a dalt.", - "sembleDescription": "Cerca híbrida local — no calen claus d'API." + "sembleProvider": "Semble - Local" }, "autoApprove": { "toggleShortcut": "Pots configurar una drecera global per a aquesta configuració a les preferències del teu IDE.", diff --git a/webview-ui/src/i18n/locales/de/settings.json b/webview-ui/src/i18n/locales/de/settings.json index cce2f51697..5d8b074a8f 100644 --- a/webview-ui/src/i18n/locales/de/settings.json +++ b/webview-ui/src/i18n/locales/de/settings.json @@ -214,15 +214,7 @@ "workspaceToggleLabel": "Indexierung für diesen Arbeitsbereich aktivieren", "workspaceDisabledMessage": "Indexierung ist konfiguriert, aber nicht für diesen Arbeitsbereich aktiviert.", "autoEnableDefaultLabel": "Indexierung für neue Arbeitsbereiche automatisch aktivieren", - "sembleProvider": "Semble - Lokal", - "semblePathLabel": "Semble-Pfad", - "semblePathPlaceholder": "semble", - "semblePathDescription": "Pfad zur semble-Ausführungsdatei. Belasse es bei \"semble\", wenn es in deinem PATH ist, oder gib den vollständigen Pfad an (z.B. \"/usr/local/bin/semble\").", - "sembleInstallTitle": "Semble installieren", - "sembleInstallDescription": "Semble muss vor der Verwendung über pip installiert werden:", - "sembleInstallCommand": "pip install semble", - "sembleInstallNote": "Erfordert Python 3.10+. Stelle sicher, dass die installierte semble-Binärdatei in deinem PATH ist, oder gib oben den vollständigen Pfad an.", - "sembleDescription": "Lokale hybride Suche — keine API-Schlüssel erforderlich." + "sembleProvider": "Semble - Lokal" }, "autoApprove": { "toggleShortcut": "Du kannst in deinen IDE-Einstellungen einen globalen Shortcut für diese Einstellung konfigurieren.", diff --git a/webview-ui/src/i18n/locales/en/settings.json b/webview-ui/src/i18n/locales/en/settings.json index e3c9f089b1..1915fa9203 100644 --- a/webview-ui/src/i18n/locales/en/settings.json +++ b/webview-ui/src/i18n/locales/en/settings.json @@ -199,14 +199,6 @@ "openRouterProviderRoutingLabel": "OpenRouter Provider Routing", "openRouterProviderRoutingDescription": "OpenRouter routes requests to the best available providers for your embedding model. By default, requests are load balanced across the top providers to maximize uptime. However, you can choose a specific provider to use for this model.", "sembleProvider": "Semble - Local", - "semblePathLabel": "Semble Path", - "semblePathPlaceholder": "semble", - "semblePathDescription": "Path to the semble executable. Leave as \"semble\" if it's on your PATH, or provide the full path (e.g. \"/usr/local/bin/semble\").", - "sembleInstallTitle": "Install semble", - "sembleInstallDescription": "Semble must be installed via pip before use:", - "sembleInstallCommand": "pip install semble", - "sembleInstallNote": "Requires Python 3.10+. Ensure the installed semble binary is on your PATH, or specify the full path above.", - "sembleDescription": "Local hybrid search — no API keys required.", "openaiCompatibleProvider": "OpenAI Compatible", "openAiKeyLabel": "OpenAI API Key", "openAiKeyPlaceholder": "Enter your OpenAI API key", diff --git a/webview-ui/src/i18n/locales/es/settings.json b/webview-ui/src/i18n/locales/es/settings.json index 540dd4ee08..00ecd99d9d 100644 --- a/webview-ui/src/i18n/locales/es/settings.json +++ b/webview-ui/src/i18n/locales/es/settings.json @@ -214,15 +214,7 @@ "workspaceToggleLabel": "Activar indexación para este espacio de trabajo", "workspaceDisabledMessage": "La indexación está configurada pero no habilitada para este espacio de trabajo.", "autoEnableDefaultLabel": "Habilitar automáticamente la indexación para nuevos espacios de trabajo", - "sembleProvider": "Semble - Local", - "semblePathLabel": "Ruta de Semble", - "semblePathPlaceholder": "semble", - "semblePathDescription": "Ruta al ejecutable de semble. Déjelo como \"semble\" si está en su PATH, o proporcione la ruta completa (ej. \"/usr/local/bin/semble\").", - "sembleInstallTitle": "Instalar semble", - "sembleInstallDescription": "Semble debe instalarse vía pip antes de usarlo:", - "sembleInstallCommand": "pip install semble", - "sembleInstallNote": "Requiere Python 3.10+. Asegúrese de que el binario semble instalado esté en su PATH, o especifique la ruta completa arriba.", - "sembleDescription": "Búsqueda híbrida local — no se requieren claves de API." + "sembleProvider": "Semble - Local" }, "autoApprove": { "toggleShortcut": "Puedes configurar un atajo global para esta configuración en las preferencias de tu IDE.", diff --git a/webview-ui/src/i18n/locales/fr/settings.json b/webview-ui/src/i18n/locales/fr/settings.json index 33d656baeb..d8e96f6209 100644 --- a/webview-ui/src/i18n/locales/fr/settings.json +++ b/webview-ui/src/i18n/locales/fr/settings.json @@ -214,15 +214,7 @@ "workspaceToggleLabel": "Activer l'indexation pour cet espace de travail", "workspaceDisabledMessage": "L'indexation est configurée mais non activée pour cet espace de travail.", "autoEnableDefaultLabel": "Activer automatiquement l'indexation pour les nouveaux espaces de travail", - "sembleProvider": "Semble - Local", - "semblePathLabel": "Chemin de Semble", - "semblePathPlaceholder": "semble", - "semblePathDescription": "Chemin vers l'exécutable semble. Laissez \"semble\" s'il est dans votre PATH, ou fournissez le chemin complet (ex. \"/usr/local/bin/semble\").", - "sembleInstallTitle": "Installer semble", - "sembleInstallDescription": "Semble doit être installé via pip avant utilisation :", - "sembleInstallCommand": "pip install semble", - "sembleInstallNote": "Nécessite Python 3.10+. Assurez-vous que le binaire semble installé est dans votre PATH, ou spécifiez le chemin complet ci-dessus.", - "sembleDescription": "Recherche hybride locale — aucune clé API requise." + "sembleProvider": "Semble - Local" }, "autoApprove": { "toggleShortcut": "Vous pouvez configurer un raccourci global pour ce paramètre dans les préférences de votre IDE.", diff --git a/webview-ui/src/i18n/locales/hi/settings.json b/webview-ui/src/i18n/locales/hi/settings.json index 3b3f9d8254..a67da09743 100644 --- a/webview-ui/src/i18n/locales/hi/settings.json +++ b/webview-ui/src/i18n/locales/hi/settings.json @@ -214,15 +214,7 @@ "workspaceToggleLabel": "इस वर्कस्पेस के लिए इंडेक्सिंग सक्षम करें", "workspaceDisabledMessage": "इंडेक्सिंग कॉन्फ़िगर की गई है लेकिन इस वर्कस्पेस के लिए सक्षम नहीं है।", "autoEnableDefaultLabel": "नए वर्कस्पेस के लिए स्वचालित रूप से इंडेक्सिंग सक्षम करें", - "sembleProvider": "Semble - स्थानीय", - "semblePathLabel": "Semble पथ", - "semblePathPlaceholder": "semble", - "semblePathDescription": "semble निष्पादन योग्य का पथ। यदि यह आपके PATH में है तो \"semble\" रहने दें, या पूरा पथ प्रदान करें (जैसे \"/usr/local/bin/semble\")।", - "sembleInstallTitle": "semble इंस्टॉल करें", - "sembleInstallDescription": "उपयोग से पहले Semble को pip के माध्यम से इंस्टॉल किया जाना चाहिए:", - "sembleInstallCommand": "pip install semble", - "sembleInstallNote": "Python 3.10+ आवश्यक है। सुनिश्चित करें कि इंस्टॉल किया गया semble बाइनरी आपके PATH पर है, या ऊपर पूरा पथ निर्दिष्ट करें।", - "sembleDescription": "स्थानीय हाइब्रिड खोज — किसी API कुंजी की आवश्यकता नहीं।" + "sembleProvider": "Semble - स्थानीय" }, "autoApprove": { "toggleShortcut": "आप अपनी आईडीई वरीयताओं में इस सेटिंग के लिए एक वैश्विक शॉर्टकट कॉन्फ़िगर कर सकते हैं।", diff --git a/webview-ui/src/i18n/locales/id/settings.json b/webview-ui/src/i18n/locales/id/settings.json index f57155283b..ab2b82e09c 100644 --- a/webview-ui/src/i18n/locales/id/settings.json +++ b/webview-ui/src/i18n/locales/id/settings.json @@ -214,15 +214,7 @@ "workspaceToggleLabel": "Aktifkan pengindeksan untuk ruang kerja ini", "workspaceDisabledMessage": "Pengindeksan dikonfigurasi tetapi tidak diaktifkan untuk ruang kerja ini.", "autoEnableDefaultLabel": "Aktifkan pengindeksan secara otomatis untuk ruang kerja baru", - "sembleProvider": "Semble - Lokal", - "semblePathLabel": "Path Semble", - "semblePathPlaceholder": "semble", - "semblePathDescription": "Path ke executable semble. Biarkan sebagai \"semble\" jika sudah ada di PATH Anda, atau berikan path lengkap (mis. \"/usr/local/bin/semble\").", - "sembleInstallTitle": "Instal semble", - "sembleInstallDescription": "Semble harus diinstal melalui pip sebelum digunakan:", - "sembleInstallCommand": "pip install semble", - "sembleInstallNote": "Membutuhkan Python 3.10+. Pastikan binary semble yang terinstal ada di PATH Anda, atau tentukan path lengkap di atas.", - "sembleDescription": "Pencarian hybrid lokal — tidak memerlukan kunci API." + "sembleProvider": "Semble - Lokal" }, "autoApprove": { "toggleShortcut": "Anda dapat mengonfigurasi pintasan global untuk pengaturan ini di preferensi IDE Anda.", diff --git a/webview-ui/src/i18n/locales/it/settings.json b/webview-ui/src/i18n/locales/it/settings.json index dc14bff598..8f8fb28f1a 100644 --- a/webview-ui/src/i18n/locales/it/settings.json +++ b/webview-ui/src/i18n/locales/it/settings.json @@ -214,15 +214,7 @@ "workspaceToggleLabel": "Abilita l'indicizzazione per questo workspace", "workspaceDisabledMessage": "L'indicizzazione è configurata ma non abilitata per questo workspace.", "autoEnableDefaultLabel": "Abilita automaticamente l'indicizzazione per i nuovi workspace", - "sembleProvider": "Semble - Locale", - "semblePathLabel": "Percorso Semble", - "semblePathPlaceholder": "semble", - "semblePathDescription": "Percorso dell'eseguibile semble. Lascia \"semble\" se è nel tuo PATH, o fornisci il percorso completo (es. \"/usr/local/bin/semble\").", - "sembleInstallTitle": "Installa semble", - "sembleInstallDescription": "Semble deve essere installato tramite pip prima dell'uso:", - "sembleInstallCommand": "pip install semble", - "sembleInstallNote": "Richiede Python 3.10+. Assicurati che il binario semble installato sia nel tuo PATH, o specifica il percorso completo sopra.", - "sembleDescription": "Ricerca ibrida locale — nessuna chiave API richiesta." + "sembleProvider": "Semble - Locale" }, "autoApprove": { "toggleShortcut": "Puoi configurare una scorciatoia globale per questa impostazione nelle preferenze del tuo IDE.", diff --git a/webview-ui/src/i18n/locales/ja/settings.json b/webview-ui/src/i18n/locales/ja/settings.json index c1a49fa037..d571220152 100644 --- a/webview-ui/src/i18n/locales/ja/settings.json +++ b/webview-ui/src/i18n/locales/ja/settings.json @@ -214,15 +214,7 @@ "workspaceToggleLabel": "このワークスペースのインデックス作成を有効にする", "workspaceDisabledMessage": "インデックス作成は設定済みですが、このワークスペースでは有効になっていません。", "autoEnableDefaultLabel": "新しいワークスペースのインデックス作成を自動的に有効にする", - "sembleProvider": "Semble - ローカル", - "semblePathLabel": "Sembleパス", - "semblePathPlaceholder": "semble", - "semblePathDescription": "semble実行ファイルのパス。PATHにある場合は\"semble\"のままにするか、フルパスを指定してください(例:\"/usr/local/bin/semble\")。", - "sembleInstallTitle": "sembleのインストール", - "sembleInstallDescription": "使用前にpipでSembleをインストールする必要があります:", - "sembleInstallCommand": "pip install semble", - "sembleInstallNote": "Python 3.10+が必要です。インストールしたsembleバイナリがPATHにあることを確認するか、上記でフルパスを指定してください。", - "sembleDescription": "ローカルハイブリッド検索 — APIキー不要。" + "sembleProvider": "Semble - ローカル" }, "autoApprove": { "toggleShortcut": "IDEの環境設定で、この設定のグローバルショートカットを設定できます。", diff --git a/webview-ui/src/i18n/locales/ko/settings.json b/webview-ui/src/i18n/locales/ko/settings.json index c9f9bc072b..651165dcb9 100644 --- a/webview-ui/src/i18n/locales/ko/settings.json +++ b/webview-ui/src/i18n/locales/ko/settings.json @@ -214,15 +214,7 @@ "workspaceToggleLabel": "이 워크스페이스에 대한 인덱싱 활성화", "workspaceDisabledMessage": "인덱싱이 구성되었지만 이 워크스페이스에서는 활성화되지 않았습니다.", "autoEnableDefaultLabel": "새 워크스페이스에 대한 인덱싱 자동 활성화", - "sembleProvider": "Semble - 로컬", - "semblePathLabel": "Semble 경로", - "semblePathPlaceholder": "semble", - "semblePathDescription": "semble 실행 파일의 경로. PATH에 있으면 \"semble\"로 두거나, 전체 경로를 제공하세요 (예: \"/usr/local/bin/semble\").", - "sembleInstallTitle": "semble 설치", - "sembleInstallDescription": "사용 전에 pip를 통해 Semble을 설치해야 합니다:", - "sembleInstallCommand": "pip install semble", - "sembleInstallNote": "Python 3.10+가 필요합니다. 설치된 semble 바이너리가 PATH에 있는지 확인하거나, 위에서 전체 경로를 지정하세요.", - "sembleDescription": "로컬 하이브리드 검색 — API 키 불필요." + "sembleProvider": "Semble - 로컬" }, "autoApprove": { "toggleShortcut": "IDE 환경 설정에서 이 설정에 대한 전역 바로 가기를 구성할 수 있습니다.", diff --git a/webview-ui/src/i18n/locales/nl/settings.json b/webview-ui/src/i18n/locales/nl/settings.json index ab21391a3c..f4be7f235c 100644 --- a/webview-ui/src/i18n/locales/nl/settings.json +++ b/webview-ui/src/i18n/locales/nl/settings.json @@ -214,15 +214,7 @@ "workspaceToggleLabel": "Indexering inschakelen voor deze werkruimte", "workspaceDisabledMessage": "Indexering is geconfigureerd maar niet ingeschakeld voor deze werkruimte.", "autoEnableDefaultLabel": "Indexering automatisch inschakelen voor nieuwe werkruimtes", - "sembleProvider": "Semble - Lokaal", - "semblePathLabel": "Semble-pad", - "semblePathPlaceholder": "semble", - "semblePathDescription": "Pad naar het semble-uitvoerbestand. Laat het op \"semble\" als het in uw PATH staat, of geef het volledige pad op (bijv. \"/usr/local/bin/semble\").", - "sembleInstallTitle": "Semble installeren", - "sembleInstallDescription": "Semble moet via pip geïnstalleerd worden voor gebruik:", - "sembleInstallCommand": "pip install semble", - "sembleInstallNote": "Vereist Python 3.10+. Zorg ervoor dat het geïnstalleerde semble-binair in uw PATH staat, of geef hierboven het volledige pad op.", - "sembleDescription": "Lokaal hybride zoeken — geen API-sleutels vereist." + "sembleProvider": "Semble - Lokaal" }, "autoApprove": { "toggleShortcut": "U kunt een globale sneltoets voor deze instelling configureren in de voorkeuren van uw IDE.", diff --git a/webview-ui/src/i18n/locales/pl/settings.json b/webview-ui/src/i18n/locales/pl/settings.json index 25a5605723..6c01b4503b 100644 --- a/webview-ui/src/i18n/locales/pl/settings.json +++ b/webview-ui/src/i18n/locales/pl/settings.json @@ -214,15 +214,7 @@ "workspaceToggleLabel": "Włącz indeksowanie dla tego workspace'a", "workspaceDisabledMessage": "Indeksowanie jest skonfigurowane, ale nie włączone dla tego workspace'a.", "autoEnableDefaultLabel": "Automatycznie włączaj indeksowanie dla nowych workspace'ów", - "sembleProvider": "Semble - Lokalny", - "semblePathLabel": "Ścieżka Semble", - "semblePathPlaceholder": "semble", - "semblePathDescription": "Ścieżka do pliku wykonywalnego semble. Pozostaw \"semble\" jeśli jest w PATH, lub podaj pełną ścieżkę (np. \"/usr/local/bin/semble\").", - "sembleInstallTitle": "Zainstaluj semble", - "sembleInstallDescription": "Semble musi być zainstalowany przez pip przed użyciem:", - "sembleInstallCommand": "pip install semble", - "sembleInstallNote": "Wymaga Python 3.10+. Upewnij się, że zainstalowany plik binarny semble jest w PATH, lub podaj pełną ścieżkę powyżej.", - "sembleDescription": "Lokalne wyszukiwanie hybrydowe — klucze API nie są wymagane." + "sembleProvider": "Semble - Lokalny" }, "autoApprove": { "toggleShortcut": "Możesz skonfigurować globalny skrót dla tego ustawienia w preferencjach swojego IDE.", diff --git a/webview-ui/src/i18n/locales/pt-BR/settings.json b/webview-ui/src/i18n/locales/pt-BR/settings.json index b6b988a1bf..50eb0e76af 100644 --- a/webview-ui/src/i18n/locales/pt-BR/settings.json +++ b/webview-ui/src/i18n/locales/pt-BR/settings.json @@ -214,15 +214,7 @@ "workspaceToggleLabel": "Ativar indexação para este workspace", "workspaceDisabledMessage": "A indexação está configurada, mas não ativada para este workspace.", "autoEnableDefaultLabel": "Ativar indexação automaticamente para novos workspaces", - "sembleProvider": "Semble - Local", - "semblePathLabel": "Caminho do Semble", - "semblePathPlaceholder": "semble", - "semblePathDescription": "Caminho para o executável do semble. Deixe como \"semble\" se estiver no seu PATH, ou forneça o caminho completo (ex. \"/usr/local/bin/semble\").", - "sembleInstallTitle": "Instalar semble", - "sembleInstallDescription": "O Semble deve ser instalado via pip antes do uso:", - "sembleInstallCommand": "pip install semble", - "sembleInstallNote": "Requer Python 3.10+. Certifique-se de que o binário semble instalado está no seu PATH, ou especifique o caminho completo acima.", - "sembleDescription": "Busca híbrida local — nenhuma chave de API necessária." + "sembleProvider": "Semble - Local" }, "autoApprove": { "toggleShortcut": "Você pode configurar um atalho global para esta configuração nas preferências do seu IDE.", diff --git a/webview-ui/src/i18n/locales/ru/settings.json b/webview-ui/src/i18n/locales/ru/settings.json index aa871c086a..77561899b2 100644 --- a/webview-ui/src/i18n/locales/ru/settings.json +++ b/webview-ui/src/i18n/locales/ru/settings.json @@ -214,15 +214,7 @@ "workspaceToggleLabel": "Включить индексацию для этого рабочего пространства", "workspaceDisabledMessage": "Индексация настроена, но не включена для этого рабочего пространства.", "autoEnableDefaultLabel": "Автоматически включать индексацию для новых рабочих пространств", - "sembleProvider": "Semble - Локальный", - "semblePathLabel": "Путь к Semble", - "semblePathPlaceholder": "semble", - "semblePathDescription": "Путь к исполняемому файлу semble. Оставьте \"semble\", если он в вашем PATH, или укажите полный путь (напр. \"/usr/local/bin/semble\").", - "sembleInstallTitle": "Установить semble", - "sembleInstallDescription": "Semble необходимо установить через pip перед использованием:", - "sembleInstallCommand": "pip install semble", - "sembleInstallNote": "Требуется Python 3.10+. Убедитесь, что установленный бинарник semble находится в вашем PATH, или укажите полный путь выше.", - "sembleDescription": "Локальный гибридный поиск — ключи API не требуются." + "sembleProvider": "Semble - Локальный" }, "autoApprove": { "toggleShortcut": "Вы можете настроить глобальное сочетание клавиш для этого параметра в настройках вашей IDE.", diff --git a/webview-ui/src/i18n/locales/tr/settings.json b/webview-ui/src/i18n/locales/tr/settings.json index 900c4798d1..679014b320 100644 --- a/webview-ui/src/i18n/locales/tr/settings.json +++ b/webview-ui/src/i18n/locales/tr/settings.json @@ -214,15 +214,7 @@ "workspaceToggleLabel": "Bu çalışma alanı için indekslemeyi etkinleştir", "workspaceDisabledMessage": "İndeksleme yapılandırıldı ancak bu çalışma alanı için etkinleştirilmedi.", "autoEnableDefaultLabel": "Yeni çalışma alanları için indekslemeyi otomatik etkinleştir", - "sembleProvider": "Semble - Yerel", - "semblePathLabel": "Semble Yolu", - "semblePathPlaceholder": "semble", - "semblePathDescription": "semble yürütülebilir dosyasının yolu. PATH'inizde ise \"semble\" olarak bırakın veya tam yolu girin (örn. \"/usr/local/bin/semble\").", - "sembleInstallTitle": "semble'ı yükle", - "sembleInstallDescription": "Kullanmadan önce Semble pip ile yüklenmelidir:", - "sembleInstallCommand": "pip install semble", - "sembleInstallNote": "Python 3.10+ gerektirir. Yüklenen semble ikili dosyasının PATH'inizde olduğundan emin olun veya yukarıda tam yolu belirtin.", - "sembleDescription": "Yerel hibrit arama — API anahtarı gerektirmez." + "sembleProvider": "Semble - Yerel" }, "autoApprove": { "toggleShortcut": "IDE tercihlerinizde bu ayar için genel bir kısayol yapılandırabilirsiniz.", diff --git a/webview-ui/src/i18n/locales/vi/settings.json b/webview-ui/src/i18n/locales/vi/settings.json index ea705870b4..16447edceb 100644 --- a/webview-ui/src/i18n/locales/vi/settings.json +++ b/webview-ui/src/i18n/locales/vi/settings.json @@ -214,15 +214,7 @@ "workspaceToggleLabel": "Bật lập chỉ mục cho không gian làm việc này", "workspaceDisabledMessage": "Lập chỉ mục đã được cấu hình nhưng chưa được bật cho không gian làm việc này.", "autoEnableDefaultLabel": "Tự động bật lập chỉ mục cho không gian làm việc mới", - "sembleProvider": "Semble - Cục bộ", - "semblePathLabel": "Đường dẫn Semble", - "semblePathPlaceholder": "semble", - "semblePathDescription": "Đường dẫn đến tệp thực thi semble. Để \"semble\" nếu nó có trong PATH của bạn, hoặc cung cấp đường dẫn đầy đủ (ví dụ: \"/usr/local/bin/semble\").", - "sembleInstallTitle": "Cài đặt semble", - "sembleInstallDescription": "Semble phải được cài đặt qua pip trước khi sử dụng:", - "sembleInstallCommand": "pip install semble", - "sembleInstallNote": "Yêu cầu Python 3.10+. Đảm bảo tệp nhị phân semble đã cài đặt nằm trong PATH của bạn, hoặc chỉ định đường dẫn đầy đủ ở trên.", - "sembleDescription": "Tìm kiếm kết hợp cục bộ — không cần khóa API." + "sembleProvider": "Semble - Cục bộ" }, "autoApprove": { "toggleShortcut": "Bạn có thể định cấu hình một phím tắt chung cho cài đặt này trong tùy chọn IDE của bạn.", diff --git a/webview-ui/src/i18n/locales/zh-CN/settings.json b/webview-ui/src/i18n/locales/zh-CN/settings.json index bf1c88c0bd..5a0826f24d 100644 --- a/webview-ui/src/i18n/locales/zh-CN/settings.json +++ b/webview-ui/src/i18n/locales/zh-CN/settings.json @@ -214,15 +214,7 @@ "workspaceToggleLabel": "为此工作区启用索引", "workspaceDisabledMessage": "索引已配置,但尚未为此工作区启用。", "autoEnableDefaultLabel": "自动为新工作区启用索引", - "sembleProvider": "Semble - 本地", - "semblePathLabel": "Semble 路径", - "semblePathPlaceholder": "semble", - "semblePathDescription": "semble 可执行文件的路径。如果在您的 PATH 中,保持为 \"semble\",或提供完整路径(例如 \"/usr/local/bin/semble\")。", - "sembleInstallTitle": "安装 semble", - "sembleInstallDescription": "使用前必须通过 pip 安装 Semble:", - "sembleInstallCommand": "pip install semble", - "sembleInstallNote": "需要 Python 3.10+。确保已安装的 semble 二进制文件在您的 PATH 中,或在上方指定完整路径。", - "sembleDescription": "本地混合搜索 - 无需 API 密钥。" + "sembleProvider": "Semble - 本地" }, "autoApprove": { "toggleShortcut": "您可以在 IDE 首选项中为此设置配置全局快捷方式。", diff --git a/webview-ui/src/i18n/locales/zh-TW/settings.json b/webview-ui/src/i18n/locales/zh-TW/settings.json index 0c053425a0..82a0b3b3c1 100644 --- a/webview-ui/src/i18n/locales/zh-TW/settings.json +++ b/webview-ui/src/i18n/locales/zh-TW/settings.json @@ -224,15 +224,7 @@ "workspaceToggleLabel": "為此工作區啟用索引", "workspaceDisabledMessage": "索引已設定,但尚未為此工作區啟用。", "autoEnableDefaultLabel": "自動為新工作區啟用索引", - "sembleProvider": "Semble - 本機", - "semblePathLabel": "Semble 路徑", - "semblePathPlaceholder": "semble", - "semblePathDescription": "semble 可執行檔的路徑。如果在您的 PATH 中,保持為 \"semble\",或提供完整路徑(例如 \"/usr/local/bin/semble\")。", - "sembleInstallTitle": "安裝 semble", - "sembleInstallDescription": "使用前必須透過 pip 安裝 Semble:", - "sembleInstallCommand": "pip install semble", - "sembleInstallNote": "需要 Python 3.10+。確保已安裝的 semble 二進位檔在您的 PATH 中,或在上方指定完整路徑。", - "sembleDescription": "本機混合搜尋 - 無需 API 金鑰。" + "sembleProvider": "Semble - 本機" }, "autoApprove": { "description": "無需詢問許可即可執行下列動作。請僅在您完全信任且了解安全風險的情況下啟用此功能。", From 9a7666c96ab25a7247e6bc8674cc350d06214662 Mon Sep 17 00:00:00 2001 From: Naved Date: Sat, 30 May 2026 12:12:25 -0700 Subject: [PATCH 04/11] Remove customizable path, only use downloaded semble --- packages/types/src/codebase-index.ts | 2 -- packages/types/src/vscode-extension-host.ts | 1 - src/core/webview/ClineProvider.ts | 2 -- src/core/webview/webviewMessageHandler.ts | 1 - src/services/code-index/config-manager.ts | 20 ----------- src/services/code-index/interfaces/config.ts | 1 - src/services/code-index/manager.ts | 7 +--- .../semble/__tests__/provider.spec.ts | 19 ++-------- .../semble/__tests__/semble-cli.spec.ts | 7 +--- src/services/code-index/semble/provider.ts | 35 ++++++++----------- src/services/code-index/semble/semble-cli.ts | 3 +- src/services/code-index/semble/types.ts | 3 -- .../src/components/chat/CodeIndexPopover.tsx | 4 --- 13 files changed, 20 insertions(+), 85 deletions(-) diff --git a/packages/types/src/codebase-index.ts b/packages/types/src/codebase-index.ts index ff6822d79a..0ce995a402 100644 --- a/packages/types/src/codebase-index.ts +++ b/packages/types/src/codebase-index.ts @@ -51,8 +51,6 @@ export const codebaseIndexConfigSchema = z.object({ codebaseIndexBedrockProfile: z.string().optional(), // OpenRouter specific fields codebaseIndexOpenRouterSpecificProvider: z.string().optional(), - // Semble specific fields - codebaseIndexSemblePath: z.string().optional(), }) export type CodebaseIndexConfig = z.infer diff --git a/packages/types/src/vscode-extension-host.ts b/packages/types/src/vscode-extension-host.ts index 3d5e7a15f4..fc3e9a0d0a 100644 --- a/packages/types/src/vscode-extension-host.ts +++ b/packages/types/src/vscode-extension-host.ts @@ -677,7 +677,6 @@ export interface WebviewMessage { codebaseIndexSearchMaxResults?: number codebaseIndexSearchMinScore?: number codebaseIndexOpenRouterSpecificProvider?: string // OpenRouter provider routing - codebaseIndexSemblePath?: string // Path to the semble executable // Secret settings codeIndexOpenAiKey?: string diff --git a/src/core/webview/ClineProvider.ts b/src/core/webview/ClineProvider.ts index 31899756e2..6baad77f90 100644 --- a/src/core/webview/ClineProvider.ts +++ b/src/core/webview/ClineProvider.ts @@ -2253,7 +2253,6 @@ export class ClineProvider codebaseIndexBedrockRegion: codebaseIndexConfig?.codebaseIndexBedrockRegion, codebaseIndexBedrockProfile: codebaseIndexConfig?.codebaseIndexBedrockProfile, codebaseIndexOpenRouterSpecificProvider: codebaseIndexConfig?.codebaseIndexOpenRouterSpecificProvider, - codebaseIndexSemblePath: codebaseIndexConfig?.codebaseIndexSemblePath, }, // Phase 1 cloud removal: do not let Cloud-auth MDM enforcement force login-only UI flows. mdmCompliant: undefined, @@ -2458,7 +2457,6 @@ export class ClineProvider codebaseIndexBedrockProfile: stateValues.codebaseIndexConfig?.codebaseIndexBedrockProfile, codebaseIndexOpenRouterSpecificProvider: stateValues.codebaseIndexConfig?.codebaseIndexOpenRouterSpecificProvider, - codebaseIndexSemblePath: stateValues.codebaseIndexConfig?.codebaseIndexSemblePath, }, profileThresholds: stateValues.profileThresholds ?? {}, lockApiConfigAcrossModes: this.context.workspaceState.get("lockApiConfigAcrossModes", false), diff --git a/src/core/webview/webviewMessageHandler.ts b/src/core/webview/webviewMessageHandler.ts index a13595e7d9..429de051b8 100644 --- a/src/core/webview/webviewMessageHandler.ts +++ b/src/core/webview/webviewMessageHandler.ts @@ -2500,7 +2500,6 @@ export const webviewMessageHandler = async ( codebaseIndexSearchMaxResults: settings.codebaseIndexSearchMaxResults, codebaseIndexSearchMinScore: settings.codebaseIndexSearchMinScore, codebaseIndexOpenRouterSpecificProvider: settings.codebaseIndexOpenRouterSpecificProvider, - codebaseIndexSemblePath: settings.codebaseIndexSemblePath, } // Save global state first diff --git a/src/services/code-index/config-manager.ts b/src/services/code-index/config-manager.ts index 9984ca4dc4..abac552561 100644 --- a/src/services/code-index/config-manager.ts +++ b/src/services/code-index/config-manager.ts @@ -22,7 +22,6 @@ export class CodeIndexConfigManager { private vercelAiGatewayOptions?: { apiKey: string } private bedrockOptions?: { region: string; profile?: string } private openRouterOptions?: { apiKey: string; specificProvider?: string } - private semblePath?: string private qdrantUrl?: string = "http://localhost:6333" private qdrantApiKey?: string private searchMinScore?: number @@ -151,9 +150,6 @@ export class CodeIndexConfigManager { this.bedrockOptions = bedrockRegion ? { region: bedrockRegion, profile: bedrockProfile || undefined } : undefined - - // Semble path (optional — defaults to "semble" in the SembleProvider) - this.semblePath = codebaseIndexConfig.codebaseIndexSemblePath || undefined } /** @@ -200,7 +196,6 @@ export class CodeIndexConfigManager { openRouterSpecificProvider: this.openRouterOptions?.specificProvider ?? "", qdrantUrl: this.qdrantUrl ?? "", qdrantApiKey: this.qdrantApiKey ?? "", - semblePath: this.semblePath ?? "", } // Refresh secrets from VSCode storage to ensure we have the latest values @@ -417,13 +412,6 @@ export class CodeIndexConfigManager { return true } - // Semble path change requires restart to use the new executable - const prevSemblePath = prev?.semblePath ?? "" - const currentSemblePath = this.semblePath ?? "" - if (prevSemblePath !== currentSemblePath) { - return true - } - // Vector dimension changes (still important for compatibility) if (this._hasVectorDimensionChanged(prevProvider, prev?.modelId)) { return true @@ -560,12 +548,4 @@ export class CodeIndexConfigManager { public get currentSearchMaxResults(): number { return this.searchMaxResults ?? DEFAULT_MAX_SEARCH_RESULTS } - - /** - * Gets the configured path to the semble executable. - * Returns undefined if not explicitly configured (provider will use default "semble"). - */ - public get currentSemblePath(): string | undefined { - return this.semblePath - } } diff --git a/src/services/code-index/interfaces/config.ts b/src/services/code-index/interfaces/config.ts index e63f7a5c4d..f52f98aaa0 100644 --- a/src/services/code-index/interfaces/config.ts +++ b/src/services/code-index/interfaces/config.ts @@ -45,5 +45,4 @@ export type PreviousConfigSnapshot = { openRouterSpecificProvider?: string qdrantUrl?: string qdrantApiKey?: string - semblePath?: string } diff --git a/src/services/code-index/manager.ts b/src/services/code-index/manager.ts index 3146d4f05e..245e8678f5 100644 --- a/src/services/code-index/manager.ts +++ b/src/services/code-index/manager.ts @@ -404,12 +404,7 @@ export class CodeIndexManager { // Branch: if provider is "semble", create SembleProvider instead of external services if (this._configManager!.currentEmbedderProvider === "semble") { - this._sembleProvider = new SembleProvider( - this.workspacePath, - this.context, - this._stateManager, - this._configManager!.currentSemblePath, - ) + this._sembleProvider = new SembleProvider(this.workspacePath, this.context, this._stateManager) await this._sembleProvider.initialize() return } diff --git a/src/services/code-index/semble/__tests__/provider.spec.ts b/src/services/code-index/semble/__tests__/provider.spec.ts index 574142afe4..3892bc5114 100644 --- a/src/services/code-index/semble/__tests__/provider.spec.ts +++ b/src/services/code-index/semble/__tests__/provider.spec.ts @@ -57,7 +57,7 @@ describe("SembleProvider", () => { globalStorageUri: { fsPath: "/mock/storage" }, } - provider = new SembleProvider("/workspace", mockContext, mockStateManager, "semble") + provider = new SembleProvider("/workspace", mockContext, mockStateManager) mockCli = sharedMockCli }) @@ -68,13 +68,8 @@ describe("SembleProvider", () => { expect(p.state).toBe("Standby") }) - it("should create provider with custom semble path", () => { - const p = new SembleProvider("/workspace", mockContext, mockStateManager, "/usr/local/bin/semble") - expect(p).toBeDefined() - }) - it("should create provider with custom topK and content", () => { - const p = new SembleProvider("/workspace", mockContext, mockStateManager, "semble", { + const p = new SembleProvider("/workspace", mockContext, mockStateManager, { topK: 5, content: "all", }) @@ -143,16 +138,6 @@ describe("SembleProvider", () => { expect(mockCli.checkInstalled).toHaveBeenCalledTimes(1) }) - - it("should skip download when custom semble path is configured", async () => { - const customProvider = new SembleProvider("/workspace", mockContext, mockStateManager, "/custom/semble") - mockCli.checkInstalled.mockResolvedValue({ installed: true }) - - await customProvider.initialize() - - expect(downloadSemble).not.toHaveBeenCalled() - expect(customProvider.state).toBe("Indexed") - }) }) describe("startIndexing", () => { diff --git a/src/services/code-index/semble/__tests__/semble-cli.spec.ts b/src/services/code-index/semble/__tests__/semble-cli.spec.ts index 3cddc3e433..e58826bc9e 100644 --- a/src/services/code-index/semble/__tests__/semble-cli.spec.ts +++ b/src/services/code-index/semble/__tests__/semble-cli.spec.ts @@ -51,12 +51,7 @@ describe("SembleCLI", () => { }) describe("constructor", () => { - it("should use default semble path", () => { - const defaultCli = new SembleCLI() - expect(defaultCli).toBeDefined() - }) - - it("should accept a custom path to the semble executable", () => { + it("should accept a path to the semble executable", () => { const customCli = new SembleCLI("/usr/local/bin/semble") expect(customCli).toBeDefined() }) diff --git a/src/services/code-index/semble/provider.ts b/src/services/code-index/semble/provider.ts index 5f9b69a490..6521904746 100644 --- a/src/services/code-index/semble/provider.ts +++ b/src/services/code-index/semble/provider.ts @@ -21,7 +21,7 @@ import { TelemetryEventName } from "@roo-code/types" * to this provider instead of the ServiceFactory → orchestrator pipeline. */ export class SembleProvider implements ISembleProvider { - private cli: SembleCLI + private cli!: SembleCLI private readonly workspacePath: string private readonly config: SembleConfig private readonly stateManager: CodeIndexStateManager @@ -34,7 +34,6 @@ export class SembleProvider implements ISembleProvider { workspacePath: string, context: vscode.ExtensionContext, stateManager: CodeIndexStateManager, - semblePath: string = SEMBLE_DEFAULTS.DEFAULT_PATH, options?: { topK?: number; content?: SembleContentType }, ) { this.workspacePath = workspacePath @@ -42,12 +41,9 @@ export class SembleProvider implements ISembleProvider { this.stateManager = stateManager this.config = { - semblePath, topK: options?.topK ?? SEMBLE_DEFAULTS.DEFAULT_TOP_K, content: options?.content ?? SEMBLE_DEFAULTS.DEFAULT_CONTENT, } - - this.cli = new SembleCLI(semblePath) } get state(): IndexingState { @@ -55,7 +51,7 @@ export class SembleProvider implements ISembleProvider { } /** - * Initializes the provider: downloads semble if needed, then validates it works. + * Initializes the provider: downloads semble, then validates it works. */ async initialize(): Promise { if (this._isInitialized) { @@ -73,21 +69,20 @@ export class SembleProvider implements ISembleProvider { return } - // Auto-download semble if no custom path is configured - if (this.config.semblePath === SEMBLE_DEFAULTS.DEFAULT_PATH) { - try { - this.stateManager.setSystemState("Indexing", "Downloading semble binary...") - const storageDir = this.context.globalStorageUri.fsPath - const binaryPath = await downloadSemble(storageDir) - if (binaryPath) { - this.cli = new SembleCLI(binaryPath) - } - } catch (error: any) { - this._state = "Error" - this.stateManager.setSystemState("Error", `Failed to download semble: ${error?.message || error}`) - console.error("[SembleProvider] Download failed:", error?.message || error) - return + // Download semble binary + try { + this.stateManager.setSystemState("Indexing", "Downloading semble binary...") + const storageDir = this.context.globalStorageUri.fsPath + const binaryPath = await downloadSemble(storageDir) + if (!binaryPath) { + throw new Error("Download returned no path") } + this.cli = new SembleCLI(binaryPath) + } catch (error: any) { + this._state = "Error" + this.stateManager.setSystemState("Error", `Failed to download semble: ${error?.message || error}`) + console.error("[SembleProvider] Download failed:", error?.message || error) + return } // Verify the binary works diff --git a/src/services/code-index/semble/semble-cli.ts b/src/services/code-index/semble/semble-cli.ts index a5fcbf164a..1072748b96 100644 --- a/src/services/code-index/semble/semble-cli.ts +++ b/src/services/code-index/semble/semble-cli.ts @@ -6,7 +6,6 @@ import { SembleSearchResult, SembleCheckResult, SembleContentType, SEMBLE_DEFAUL * Wraps the `semble` CLI for programmatic access. * * The semble binary is automatically downloaded on enablement via semble-downloader.ts. - * The semblePath should be a direct path to the executable. * * All methods spawn the semble process via child_process.spawn with array * arguments (no shell) to prevent shell injection. @@ -24,7 +23,7 @@ import { SembleSearchResult, SembleCheckResult, SembleContentType, SEMBLE_DEFAUL export class SembleCLI { private readonly semblePath: string - constructor(semblePath: string = SEMBLE_DEFAULTS.DEFAULT_PATH) { + constructor(semblePath: string) { this.semblePath = semblePath } diff --git a/src/services/code-index/semble/types.ts b/src/services/code-index/semble/types.ts index d41263fba8..4a49c561f6 100644 --- a/src/services/code-index/semble/types.ts +++ b/src/services/code-index/semble/types.ts @@ -41,8 +41,6 @@ export interface SembleCheckResult { * Configuration for the Semble provider. */ export interface SembleConfig { - /** Path to the semble executable. Default: "semble" (uses PATH). */ - semblePath: string /** Maximum search results to return. Default: 10. */ topK: number /** Content types to index. Default: "code". */ @@ -83,7 +81,6 @@ export interface ISembleProvider { * Default configuration values for Semble. */ export const SEMBLE_DEFAULTS = { - DEFAULT_PATH: "semble", DEFAULT_TOP_K: 10, DEFAULT_CONTENT: "code" as SembleContentType, } diff --git a/webview-ui/src/components/chat/CodeIndexPopover.tsx b/webview-ui/src/components/chat/CodeIndexPopover.tsx index 97e2803a1e..d3e1af2b08 100644 --- a/webview-ui/src/components/chat/CodeIndexPopover.tsx +++ b/webview-ui/src/components/chat/CodeIndexPopover.tsx @@ -81,7 +81,6 @@ interface LocalCodeIndexSettings { codebaseIndexVercelAiGatewayApiKey?: string codebaseIndexOpenRouterApiKey?: string codebaseIndexOpenRouterSpecificProvider?: string - codebaseIndexSemblePath?: string } // Validation schema for codebase index settings @@ -181,7 +180,6 @@ const createValidationSchema = (provider: EmbedderProvider, t: any) => { // Semble requires no API keys, Qdrant URL, or model selection return z.object({ codebaseIndexEnabled: z.boolean(), - codebaseIndexSemblePath: z.string().optional(), }) default: @@ -237,7 +235,6 @@ export const CodeIndexPopover: React.FC = ({ codebaseIndexVercelAiGatewayApiKey: "", codebaseIndexOpenRouterApiKey: "", codebaseIndexOpenRouterSpecificProvider: "", - codebaseIndexSemblePath: "", }) // Initial settings state - stores the settings when popover opens @@ -278,7 +275,6 @@ export const CodeIndexPopover: React.FC = ({ codebaseIndexOpenRouterApiKey: "", codebaseIndexOpenRouterSpecificProvider: codebaseIndexConfig.codebaseIndexOpenRouterSpecificProvider || "", - codebaseIndexSemblePath: codebaseIndexConfig.codebaseIndexSemblePath || "", } setInitialSettings(settings) setCurrentSettings(settings) From d12ff7d17d1de19f00391af2c07a032d90d63d92 Mon Sep 17 00:00:00 2001 From: Naved Date: Sat, 30 May 2026 12:43:53 -0700 Subject: [PATCH 05/11] fix test --- .../__tests__/config-manager.spec.ts | 104 ++++++++ .../__tests__/service-factory.spec.ts | 22 ++ .../semble/__tests__/provider.spec.ts | 236 ++++++++++++++++++ .../semble/__tests__/semble-cli.spec.ts | 107 ++++++++ .../__tests__/semble-downloader.spec.ts | 86 +++++++ 5 files changed, 555 insertions(+) diff --git a/src/services/code-index/__tests__/config-manager.spec.ts b/src/services/code-index/__tests__/config-manager.spec.ts index 27815c0bef..1839eb464f 100644 --- a/src/services/code-index/__tests__/config-manager.spec.ts +++ b/src/services/code-index/__tests__/config-manager.spec.ts @@ -1127,6 +1127,86 @@ describe("CodeIndexConfigManager", () => { expect(requiresRestart).toBe(true) }) }) + + describe("semble provider configuration", () => { + it("should load semble provider configuration", async () => { + mockContextProxy.getGlobalState.mockReturnValue({ + codebaseIndexEnabled: true, + codebaseIndexEmbedderProvider: "semble", + }) + mockContextProxy.getSecret.mockReturnValue(undefined) + + const result = await configManager.loadConfiguration() + + expect(result.currentConfig.embedderProvider).toBe("semble") + expect(result.currentConfig.isConfigured).toBe(true) + }) + + it("should require restart when switching from openai to semble", async () => { + // Initial state with OpenAI + mockContextProxy.getGlobalState.mockReturnValue({ + codebaseIndexEnabled: true, + codebaseIndexQdrantUrl: "http://qdrant.local", + codebaseIndexEmbedderProvider: "openai", + codebaseIndexEmbedderModelId: "text-embedding-3-small", + }) + setupSecretMocks({ + codeIndexOpenAiKey: "test-key", + }) + + await configManager.loadConfiguration() + + // Switch to semble + mockContextProxy.getGlobalState.mockReturnValue({ + codebaseIndexEnabled: true, + codebaseIndexEmbedderProvider: "semble", + }) + mockContextProxy.getSecret.mockReturnValue(undefined) + + const result = await configManager.loadConfiguration() + expect(result.requiresRestart).toBe(true) + }) + + it("should require restart when switching from semble to openai", async () => { + // Initial state with semble + mockContextProxy.getGlobalState.mockReturnValue({ + codebaseIndexEnabled: true, + codebaseIndexEmbedderProvider: "semble", + }) + mockContextProxy.getSecret.mockReturnValue(undefined) + + await configManager.loadConfiguration() + + // Switch to openai + mockContextProxy.getGlobalState.mockReturnValue({ + codebaseIndexEnabled: true, + codebaseIndexQdrantUrl: "http://qdrant.local", + codebaseIndexEmbedderProvider: "openai", + codebaseIndexEmbedderModelId: "text-embedding-3-small", + }) + setupSecretMocks({ + codeIndexOpenAiKey: "test-key", + }) + + const result = await configManager.loadConfiguration() + expect(result.requiresRestart).toBe(true) + }) + + it("should not require restart when semble config stays the same", async () => { + // Initial state with semble + mockContextProxy.getGlobalState.mockReturnValue({ + codebaseIndexEnabled: true, + codebaseIndexEmbedderProvider: "semble", + }) + mockContextProxy.getSecret.mockReturnValue(undefined) + + await configManager.loadConfiguration() + + // Same semble config again + const result = await configManager.loadConfiguration() + expect(result.requiresRestart).toBe(false) + }) + }) }) describe("isConfigured", () => { @@ -1684,6 +1764,30 @@ describe("CodeIndexConfigManager", () => { expect(configManager.isConfigured()).toBe(false) }) + it("should always return true for semble provider (no API keys or Qdrant needed)", () => { + mockContextProxy.getGlobalState.mockReturnValue({ + codebaseIndexEnabled: true, + codebaseIndexEmbedderProvider: "semble", + }) + mockContextProxy.getSecret.mockReturnValue(undefined) + + configManager = new CodeIndexConfigManager(mockContextProxy) + expect(configManager.isConfigured()).toBe(true) + }) + + it("should return true for semble even without any other configuration", () => { + mockContextProxy.getGlobalState.mockReturnValue({ + codebaseIndexEnabled: true, + codebaseIndexEmbedderProvider: "semble", + // No qdrant URL, no API keys + }) + mockContextProxy.getSecret.mockReturnValue(undefined) + + configManager = new CodeIndexConfigManager(mockContextProxy) + expect(configManager.isConfigured()).toBe(true) + expect(configManager.isFeatureConfigured).toBe(true) + }) + describe("currentModelDimension", () => { beforeEach(() => { vi.clearAllMocks() diff --git a/src/services/code-index/__tests__/service-factory.spec.ts b/src/services/code-index/__tests__/service-factory.spec.ts index 3e943ebd82..b24d5df2fc 100644 --- a/src/services/code-index/__tests__/service-factory.spec.ts +++ b/src/services/code-index/__tests__/service-factory.spec.ts @@ -356,6 +356,17 @@ describe("CodeIndexServiceFactory", () => { // Act & Assert expect(() => factory.createEmbedder()).toThrow("serviceFactory.invalidEmbedderType") }) + + it("should throw when provider is semble (semble handles its own embedding)", () => { + const testConfig = { + embedderProvider: "semble", + } + mockConfigManager.getConfig.mockReturnValue(testConfig as any) + + expect(() => factory.createEmbedder()).toThrow( + "Semble provider handles its own embedding. Do not call createEmbedder() for semble", + ) + }) }) describe("createVectorStore", () => { @@ -678,6 +689,17 @@ describe("CodeIndexServiceFactory", () => { // Act & Assert expect(() => factory.createVectorStore()).toThrow("serviceFactory.qdrantUrlMissing") }) + + it("should throw when provider is semble (semble handles its own vector storage)", () => { + const testConfig = { + embedderProvider: "semble", + } + mockConfigManager.getConfig.mockReturnValue(testConfig as any) + + expect(() => factory.createVectorStore()).toThrow( + "Semble provider handles its own vector storage. Do not call createVectorStore() for semble", + ) + }) }) describe("validateEmbedder", () => { diff --git a/src/services/code-index/semble/__tests__/provider.spec.ts b/src/services/code-index/semble/__tests__/provider.spec.ts index 3892bc5114..fab3eed45e 100644 --- a/src/services/code-index/semble/__tests__/provider.spec.ts +++ b/src/services/code-index/semble/__tests__/provider.spec.ts @@ -302,6 +302,28 @@ describe("SembleProvider", () => { }) }) + it("should resolve relative directoryPrefix against workspace path", async () => { + mockCli.search.mockResolvedValue([]) + + await provider.searchIndex("test", "src/subdir") + + expect(mockCli.search).toHaveBeenCalledWith("test", "/workspace/src/subdir", { + topK: SEMBLE_DEFAULTS.DEFAULT_TOP_K, + content: SEMBLE_DEFAULTS.DEFAULT_CONTENT, + }) + }) + + it("should use absolute directoryPrefix as-is", async () => { + mockCli.search.mockResolvedValue([]) + + await provider.searchIndex("test", "/absolute/custom/path") + + expect(mockCli.search).toHaveBeenCalledWith("test", "/absolute/custom/path", { + topK: SEMBLE_DEFAULTS.DEFAULT_TOP_K, + content: SEMBLE_DEFAULTS.DEFAULT_CONTENT, + }) + }) + it("should return empty array on search error and log telemetry", async () => { mockCli.search.mockRejectedValue(new Error("Search failed")) @@ -353,4 +375,218 @@ describe("SembleProvider", () => { expect(results).toEqual([]) }) }) + + describe("_convertResults edge cases", () => { + beforeEach(async () => { + mockCli.checkInstalled.mockResolvedValue({ installed: true }) + await provider.initialize() + }) + + it("should handle results with null content using empty string fallback", async () => { + const mockResults = [ + { + chunk: { + content: null, + file_path: "src/file.ts", + start_line: null, + end_line: null, + language: null, + location: "", + }, + score: 0.6, + }, + ] + + mockCli.search.mockResolvedValue(mockResults) + + const results = await provider.searchIndex("test") + + expect(results).toHaveLength(1) + expect(results[0].payload?.codeChunk).toBe("") + expect(results[0].payload?.startLine).toBe(0) + expect(results[0].payload?.endLine).toBe(0) + }) + + it("should handle results with undefined content fields", async () => { + const mockResults = [ + { + chunk: { + content: undefined, + file_path: "src/file.ts", + start_line: undefined, + end_line: undefined, + language: undefined, + location: "", + }, + score: 0.5, + }, + ] + + mockCli.search.mockResolvedValue(mockResults) + + const results = await provider.searchIndex("test") + + expect(results).toHaveLength(1) + expect(results[0].payload?.codeChunk).toBe("") + expect(results[0].payload?.startLine).toBe(0) + expect(results[0].payload?.endLine).toBe(0) + }) + + it("should normalize backslashes in file paths", async () => { + const mockResults = [ + { + chunk: { + content: "code", + file_path: "src\\nested\\file.ts", + start_line: 1, + end_line: 10, + language: "typescript", + location: "", + }, + score: 0.8, + }, + ] + + mockCli.search.mockResolvedValue(mockResults) + + const results = await provider.searchIndex("test") + + expect(results).toHaveLength(1) + expect(results[0].payload?.filePath).not.toContain("\\") + expect(results[0].payload?.filePath).toContain("/") + }) + + it("should join file paths against the searchPath when directoryPrefix is provided", async () => { + const mockResults = [ + { + chunk: { + content: "code", + file_path: "file.ts", + start_line: 1, + end_line: 5, + language: "typescript", + location: "", + }, + score: 0.9, + }, + ] + + mockCli.search.mockResolvedValue(mockResults) + + const results = await provider.searchIndex("test", "/custom/path") + + expect(results[0].payload?.filePath).toBe("/custom/path/file.ts") + }) + + it("should assign sequential semble-N IDs to results", async () => { + const mockResults = [ + { + chunk: { + content: "a", + file_path: "a.ts", + start_line: 1, + end_line: 2, + language: "ts", + location: "", + }, + score: 0.9, + }, + { + chunk: { + content: "b", + file_path: "b.ts", + start_line: 1, + end_line: 2, + language: "ts", + location: "", + }, + score: 0.8, + }, + { + chunk: { + content: "c", + file_path: "c.ts", + start_line: 1, + end_line: 2, + language: "ts", + location: "", + }, + score: 0.7, + }, + ] + + mockCli.search.mockResolvedValue(mockResults) + + const results = await provider.searchIndex("test") + + expect(results[0].id).toBe("semble-0") + expect(results[1].id).toBe("semble-1") + expect(results[2].id).toBe("semble-2") + }) + }) + + describe("initialize error edge cases", () => { + it("should set Error state when download returns no path (undefined)", async () => { + ;(downloadSemble as any).mockResolvedValue(undefined) + + await provider.initialize() + + expect(provider.state).toBe("Error") + expect(mockStateManager.setSystemState).toHaveBeenCalledWith( + "Error", + expect.stringContaining("Failed to download semble"), + ) + }) + + it("should set Error state with default message when checkInstalled returns no error string", async () => { + mockCli.checkInstalled.mockResolvedValue({ + installed: false, + error: undefined, + }) + + await provider.initialize() + + expect(provider.state).toBe("Error") + expect(mockStateManager.setSystemState).toHaveBeenCalledWith( + "Error", + expect.stringContaining("Semble binary is not functional"), + ) + }) + }) + + describe("custom config options", () => { + it("should pass custom topK to CLI search", async () => { + const customProvider = new SembleProvider("/workspace", mockContext, mockStateManager, { + topK: 5, + }) + + mockCli.checkInstalled.mockResolvedValue({ installed: true }) + await customProvider.initialize() + mockCli.search.mockResolvedValue([]) + + await customProvider.searchIndex("test") + + expect(mockCli.search).toHaveBeenCalledWith("test", "/workspace", { + topK: 5, + content: "code", + }) + }) + + it("should pass custom content type to CLI search", async () => { + const customProvider = new SembleProvider("/workspace", mockContext, mockStateManager, { + content: "all", + }) + + mockCli.checkInstalled.mockResolvedValue({ installed: true }) + await customProvider.initialize() + mockCli.search.mockResolvedValue([]) + + await customProvider.searchIndex("test") + + expect(mockCli.search).toHaveBeenCalledWith("test", "/workspace", { + topK: 10, + content: "all", + }) + }) + }) }) diff --git a/src/services/code-index/semble/__tests__/semble-cli.spec.ts b/src/services/code-index/semble/__tests__/semble-cli.spec.ts index e58826bc9e..5f692969f7 100644 --- a/src/services/code-index/semble/__tests__/semble-cli.spec.ts +++ b/src/services/code-index/semble/__tests__/semble-cli.spec.ts @@ -178,11 +178,67 @@ describe("SembleCLI", () => { ) }) + it("should not add --content flag for code (default)", async () => { + mockSpawn.mockReturnValue(createMockProcess(JSON.stringify({ query: "related", results: [] }), "", 0)) + + await cli.findRelated("src/auth.ts", 42, "/repo", { content: "code" }) + + expect(mockSpawn).toHaveBeenCalledWith( + "semble", + ["find-related", "src/auth.ts", "42", "/repo", "-k", "10"], + expect.any(Object), + ) + }) + + it("should add --content flag for docs content type", async () => { + mockSpawn.mockReturnValue(createMockProcess(JSON.stringify({ query: "related", results: [] }), "", 0)) + + await cli.findRelated("src/auth.ts", 42, "/repo", { content: "docs" }) + + expect(mockSpawn).toHaveBeenCalledWith( + "semble", + ["find-related", "src/auth.ts", "42", "/repo", "-k", "10", "--content", "docs"], + expect.any(Object), + ) + }) + it("should throw error when semble find-related fails", async () => { mockSpawn.mockReturnValue(createMockProcess("", "Error: no chunk found", 1)) await expect(cli.findRelated("src/auth.ts", 42, "/repo")).rejects.toThrow("Semble find-related failed") }) + + it("should throw with message when find-related fails with empty stderr", async () => { + mockSpawn.mockReturnValue(createMockProcess("", "", 1)) + + await expect(cli.findRelated("src/auth.ts", 42, "/repo")).rejects.toThrow("Semble find-related failed") + }) + + it("should parse results from find-related", async () => { + const jsonResponse = { + query: "related", + results: [ + { + chunk: { + content: "related code", + file_path: "src/related.ts", + start_line: 1, + end_line: 10, + language: "typescript", + location: "src/related.ts:1-10", + }, + score: 0.85, + }, + ], + } + mockSpawn.mockReturnValue(createMockProcess(JSON.stringify(jsonResponse), "", 0)) + + const results = await cli.findRelated("src/auth.ts", 42, "/repo") + + expect(results).toHaveLength(1) + expect(results[0].chunk.file_path).toBe("src/related.ts") + expect(results[0].score).toBe(0.85) + }) }) describe("_parseOutput (via search)", () => { @@ -268,5 +324,56 @@ describe("SembleCLI", () => { expect(results).toEqual([]) }) + + it("should handle flat array format (older semble format)", async () => { + const flatArray = [ + { + chunk: { + content: "old format result", + file_path: "src/old.ts", + start_line: 1, + end_line: 5, + language: "typescript", + location: "src/old.ts:1-5", + }, + score: 0.7, + }, + ] + mockSpawn.mockReturnValue(createMockProcess(JSON.stringify(flatArray), "", 0)) + + const results = await cli.search("test", "/repo") + + expect(results).toHaveLength(1) + expect(results[0].chunk.file_path).toBe("src/old.ts") + expect(results[0].score).toBe(0.7) + }) + + it("should return empty array for unexpected JSON structure", async () => { + mockSpawn.mockReturnValue(createMockProcess(JSON.stringify({ unexpected: "format" }), "", 0)) + + const results = await cli.search("test", "/repo") + + expect(results).toEqual([]) + }) + }) + + describe("search error handling", () => { + it("should include stderr in error message when available", async () => { + mockSpawn.mockReturnValue(createMockProcess("", "Permission denied: /repo", 1)) + + await expect(cli.search("test", "/repo")).rejects.toThrow("Permission denied: /repo") + }) + + it("should fall back to process exit message when stderr is empty", async () => { + mockSpawn.mockReturnValue(createMockProcess("", "", 1)) + + await expect(cli.search("test", "/repo")).rejects.toThrow("Semble search failed") + }) + + it("should handle spawn error during search", async () => { + mockSpawn.mockReturnValue(createErrorProcess("EACCES: permission denied")) + + await expect(cli.search("test", "/repo")).rejects.toThrow("EACCES: permission denied") + }) }) }) diff --git a/src/services/code-index/semble/__tests__/semble-downloader.spec.ts b/src/services/code-index/semble/__tests__/semble-downloader.spec.ts index 5fab2754e4..df7dff3882 100644 --- a/src/services/code-index/semble/__tests__/semble-downloader.spec.ts +++ b/src/services/code-index/semble/__tests__/semble-downloader.spec.ts @@ -349,5 +349,91 @@ describe("semble-downloader", () => { if (originalArch) Object.defineProperty(process, "arch", originalArch) } }) + + it("should use correct binary name for windows", async () => { + const originalPlatform = Object.getOwnPropertyDescriptor(process, "platform") + const originalArch = Object.getOwnPropertyDescriptor(process, "arch") + + Object.defineProperty(process, "platform", { value: "win32", configurable: true }) + Object.defineProperty(process, "arch", { value: "x64", configurable: true }) + ;(fs.access as any).mockResolvedValue(undefined) + + try { + const result = await getSembleBinaryPath("/storage") + expect(result).toBe(path.join("/storage", "semble", "semble.exe")) + } finally { + if (originalPlatform) Object.defineProperty(process, "platform", originalPlatform) + if (originalArch) Object.defineProperty(process, "arch", originalArch) + } + }) + }) + + describe("downloadSemble - zip extraction on Windows", () => { + it("should use PowerShell Expand-Archive on Windows", async () => { + const originalPlatform = Object.getOwnPropertyDescriptor(process, "platform") + const originalArch = Object.getOwnPropertyDescriptor(process, "arch") + + Object.defineProperty(process, "platform", { value: "win32", configurable: true }) + Object.defineProperty(process, "arch", { value: "x64", configurable: true }) + + // fs.access rejects => file not present, triggering download + ;(fs.access as any).mockRejectedValue(new Error("ENOENT")) + + // Simulate successful download + mockWriteStream.on.mockImplementation((event: string, cb: () => void) => { + if (event === "finish") { + setImmediate(cb) + } + }) + + try { + const result = await downloadSemble("/storage") + + expect(result).toBe(path.join("/storage", "semble", "semble.exe")) + // Should call PowerShell for zip extraction + expect(spawn).toHaveBeenCalledWith( + "powershell", + expect.arrayContaining(["-NoProfile", "-Command", expect.stringContaining("Expand-Archive")]), + expect.any(Object), + ) + // Should NOT call chmod on windows + expect(fs.chmod).not.toHaveBeenCalled() + } finally { + if (originalPlatform) Object.defineProperty(process, "platform", originalPlatform) + if (originalArch) Object.defineProperty(process, "arch", originalArch) + } + }) + }) + + describe("downloadSemble - error handling edge cases", () => { + it("should not throw when archive cleanup fails after successful extraction", async () => { + const originalPlatform = Object.getOwnPropertyDescriptor(process, "platform") + const originalArch = Object.getOwnPropertyDescriptor(process, "arch") + + Object.defineProperty(process, "platform", { value: "linux", configurable: true }) + Object.defineProperty(process, "arch", { value: "x64", configurable: true }) + + // fs.access rejects => file not present + ;(fs.access as any).mockRejectedValue(new Error("ENOENT")) + + // Simulate successful download + mockWriteStream.on.mockImplementation((event: string, cb: () => void) => { + if (event === "finish") { + setImmediate(cb) + } + }) + + // Archive cleanup fails but should not throw (only archive removal after extraction) + ;(fs.unlink as any).mockRejectedValue(new Error("unlink cleanup failed")) + + try { + const result = await downloadSemble("/storage") + // Should still succeed — archive cleanup failure is ignored + expect(result).toBe(path.join("/storage", "semble", "semble")) + } finally { + if (originalPlatform) Object.defineProperty(process, "platform", originalPlatform) + if (originalArch) Object.defineProperty(process, "arch", originalArch) + } + }) }) }) From 19f0864a4492f08890fb8d825f0aa2d034cb2120 Mon Sep 17 00:00:00 2001 From: Naved Date: Sat, 30 May 2026 19:12:45 -0700 Subject: [PATCH 06/11] fix windows test --- src/services/code-index/semble/__tests__/provider.spec.ts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/services/code-index/semble/__tests__/provider.spec.ts b/src/services/code-index/semble/__tests__/provider.spec.ts index fab3eed45e..e72ebdd665 100644 --- a/src/services/code-index/semble/__tests__/provider.spec.ts +++ b/src/services/code-index/semble/__tests__/provider.spec.ts @@ -1,4 +1,5 @@ import { describe, it, expect, vi, beforeEach } from "vitest" +import * as path from "path" import { SembleProvider } from "../provider" import { SembleCLI } from "../semble-cli" import { SEMBLE_DEFAULTS } from "../types" @@ -307,7 +308,8 @@ describe("SembleProvider", () => { await provider.searchIndex("test", "src/subdir") - expect(mockCli.search).toHaveBeenCalledWith("test", "/workspace/src/subdir", { + const expectedPath = path.join("/workspace", "src/subdir") + expect(mockCli.search).toHaveBeenCalledWith("test", expectedPath, { topK: SEMBLE_DEFAULTS.DEFAULT_TOP_K, content: SEMBLE_DEFAULTS.DEFAULT_CONTENT, }) From 1632acfee8a915b3d93b6d0b59f8d78ecb297ddc Mon Sep 17 00:00:00 2001 From: Naved Date: Sat, 30 May 2026 20:18:52 -0700 Subject: [PATCH 07/11] make semble binary upgradable --- .../__tests__/semble-downloader.spec.ts | 170 ++++++++++++++++++ src/services/code-index/semble/semble-cli.ts | 3 +- .../code-index/semble/semble-downloader.ts | 66 +++++-- 3 files changed, 226 insertions(+), 13 deletions(-) diff --git a/src/services/code-index/semble/__tests__/semble-downloader.spec.ts b/src/services/code-index/semble/__tests__/semble-downloader.spec.ts index df7dff3882..575277f850 100644 --- a/src/services/code-index/semble/__tests__/semble-downloader.spec.ts +++ b/src/services/code-index/semble/__tests__/semble-downloader.spec.ts @@ -10,6 +10,8 @@ vi.mock("fs/promises", () => ({ chmod: vi.fn().mockResolvedValue(undefined), unlink: vi.fn().mockResolvedValue(undefined), rm: vi.fn().mockResolvedValue(undefined), + readFile: vi.fn(), + writeFile: vi.fn().mockResolvedValue(undefined), })) // Mock fs (createWriteStream) @@ -144,6 +146,8 @@ describe("semble-downloader", () => { // fs.access resolves => file exists ;(fs.access as any).mockResolvedValue(undefined) + // Version file matches current version + ;(fs.readFile as any).mockResolvedValue("v0.3.1") try { const result = await downloadSemble("/storage") @@ -168,6 +172,8 @@ describe("semble-downloader", () => { // fs.access rejects => file not present ;(fs.access as any).mockRejectedValue(new Error("ENOENT")) + // No version file exists + ;(fs.readFile as any).mockRejectedValue(new Error("ENOENT")) // Simulate successful download: pipe is called, then "finish" fires mockWriteStream.on.mockImplementation((event: string, cb: () => void) => { @@ -196,6 +202,12 @@ describe("semble-downloader", () => { expect.any(Object), ) expect(fs.chmod).toHaveBeenCalledWith(path.join("/storage", "semble", "semble"), 0o755) + // Version file should be written + expect(fs.writeFile).toHaveBeenCalledWith( + path.join("/storage", "semble", ".semble-version"), + "v0.3.1", + "utf-8", + ) // Archive should be cleaned up expect(fs.unlink).toHaveBeenCalledWith(path.join("/storage", "semble-linux-x64-fast.tar.gz")) } finally { @@ -213,6 +225,8 @@ describe("semble-downloader", () => { // fs.access resolves => file exists ;(fs.access as any).mockResolvedValue(undefined) + // Version file matches + ;(fs.readFile as any).mockResolvedValue("v0.3.1") try { const result = await downloadSemble("/storage") @@ -234,6 +248,8 @@ describe("semble-downloader", () => { // fs.access rejects => file not present ;(fs.access as any).mockRejectedValue(new Error("ENOENT")) + // No version file + ;(fs.readFile as any).mockRejectedValue(new Error("ENOENT")) // Simulate HTTP error response mockResponse.statusCode = 404 @@ -257,6 +273,8 @@ describe("semble-downloader", () => { // fs.access rejects => file not present ;(fs.access as any).mockRejectedValue(new Error("ENOENT")) + // No version file + ;(fs.readFile as any).mockRejectedValue(new Error("ENOENT")) // First call returns a redirect, second call returns 200 let callCount = 0 @@ -378,6 +396,8 @@ describe("semble-downloader", () => { // fs.access rejects => file not present, triggering download ;(fs.access as any).mockRejectedValue(new Error("ENOENT")) + // No version file + ;(fs.readFile as any).mockRejectedValue(new Error("ENOENT")) // Simulate successful download mockWriteStream.on.mockImplementation((event: string, cb: () => void) => { @@ -415,6 +435,8 @@ describe("semble-downloader", () => { // fs.access rejects => file not present ;(fs.access as any).mockRejectedValue(new Error("ENOENT")) + // No version file + ;(fs.readFile as any).mockRejectedValue(new Error("ENOENT")) // Simulate successful download mockWriteStream.on.mockImplementation((event: string, cb: () => void) => { @@ -436,4 +458,152 @@ describe("semble-downloader", () => { } }) }) + + describe("downloadSemble - version tracking", () => { + it("should re-download when installed version differs from SEMBLE_VERSION", async () => { + const originalPlatform = Object.getOwnPropertyDescriptor(process, "platform") + const originalArch = Object.getOwnPropertyDescriptor(process, "arch") + + Object.defineProperty(process, "platform", { value: "linux", configurable: true }) + Object.defineProperty(process, "arch", { value: "x64", configurable: true }) + + // Version file has an old version + ;(fs.readFile as any).mockResolvedValue("v0.2.0") + // Binary doesn't matter — version mismatch forces re-download + ;(fs.access as any).mockRejectedValue(new Error("ENOENT")) + + // Simulate successful download + mockWriteStream.on.mockImplementation((event: string, cb: () => void) => { + if (event === "finish") { + setImmediate(cb) + } + }) + + try { + const result = await downloadSemble("/storage") + + expect(result).toBe(path.join("/storage", "semble", "semble")) + // Should remove old installation + expect(fs.rm).toHaveBeenCalledWith(path.join("/storage", "semble"), { + recursive: true, + force: true, + }) + // Should download the new version + expect(https.get).toHaveBeenCalledWith(expect.stringContaining("v0.3.1"), expect.any(Function)) + // Should write the new version file + expect(fs.writeFile).toHaveBeenCalledWith( + path.join("/storage", "semble", ".semble-version"), + "v0.3.1", + "utf-8", + ) + } finally { + if (originalPlatform) Object.defineProperty(process, "platform", originalPlatform) + if (originalArch) Object.defineProperty(process, "arch", originalArch) + } + }) + + it("should skip download when installed version matches SEMBLE_VERSION and binary exists", async () => { + const originalPlatform = Object.getOwnPropertyDescriptor(process, "platform") + const originalArch = Object.getOwnPropertyDescriptor(process, "arch") + + Object.defineProperty(process, "platform", { value: "linux", configurable: true }) + Object.defineProperty(process, "arch", { value: "x64", configurable: true }) + + // Version matches + ;(fs.readFile as any).mockResolvedValue("v0.3.1") + // Binary exists + ;(fs.access as any).mockResolvedValue(undefined) + + try { + const result = await downloadSemble("/storage") + + expect(result).toBe(path.join("/storage", "semble", "semble")) + // Should NOT download + expect(https.get).not.toHaveBeenCalled() + // Should NOT remove the extract dir + expect(fs.rm).not.toHaveBeenCalled() + } finally { + if (originalPlatform) Object.defineProperty(process, "platform", originalPlatform) + if (originalArch) Object.defineProperty(process, "arch", originalArch) + } + }) + + it("should re-download when version matches but binary is missing", async () => { + const originalPlatform = Object.getOwnPropertyDescriptor(process, "platform") + const originalArch = Object.getOwnPropertyDescriptor(process, "arch") + + Object.defineProperty(process, "platform", { value: "linux", configurable: true }) + Object.defineProperty(process, "arch", { value: "x64", configurable: true }) + + // Version matches + ;(fs.readFile as any).mockResolvedValue("v0.3.1") + // But binary is missing + ;(fs.access as any).mockRejectedValue(new Error("ENOENT")) + + // Simulate successful download + mockWriteStream.on.mockImplementation((event: string, cb: () => void) => { + if (event === "finish") { + setImmediate(cb) + } + }) + + try { + const result = await downloadSemble("/storage") + + expect(result).toBe(path.join("/storage", "semble", "semble")) + // Should download since binary was missing + expect(https.get).toHaveBeenCalled() + // Should write version file again + expect(fs.writeFile).toHaveBeenCalledWith( + path.join("/storage", "semble", ".semble-version"), + "v0.3.1", + "utf-8", + ) + } finally { + if (originalPlatform) Object.defineProperty(process, "platform", originalPlatform) + if (originalArch) Object.defineProperty(process, "arch", originalArch) + } + }) + + it("should download when no version file exists (first install)", async () => { + const originalPlatform = Object.getOwnPropertyDescriptor(process, "platform") + const originalArch = Object.getOwnPropertyDescriptor(process, "arch") + + Object.defineProperty(process, "platform", { value: "linux", configurable: true }) + Object.defineProperty(process, "arch", { value: "x64", configurable: true }) + + // No version file + ;(fs.readFile as any).mockRejectedValue(new Error("ENOENT")) + // No binary + ;(fs.access as any).mockRejectedValue(new Error("ENOENT")) + + // Simulate successful download + mockWriteStream.on.mockImplementation((event: string, cb: () => void) => { + if (event === "finish") { + setImmediate(cb) + } + }) + + try { + const result = await downloadSemble("/storage") + + expect(result).toBe(path.join("/storage", "semble", "semble")) + expect(https.get).toHaveBeenCalled() + // Should NOT try to rm the old dir (no previous version) + expect(fs.rm).not.toHaveBeenCalledWith( + path.join("/storage", "semble"), + expect.objectContaining({ recursive: true }), + ) + // Should write version file + expect(fs.writeFile).toHaveBeenCalledWith( + path.join("/storage", "semble", ".semble-version"), + "v0.3.1", + "utf-8", + ) + } finally { + if (originalPlatform) Object.defineProperty(process, "platform", originalPlatform) + if (originalArch) Object.defineProperty(process, "arch", originalArch) + } + }) + }) }) diff --git a/src/services/code-index/semble/semble-cli.ts b/src/services/code-index/semble/semble-cli.ts index 1072748b96..d3690a4c75 100644 --- a/src/services/code-index/semble/semble-cli.ts +++ b/src/services/code-index/semble/semble-cli.ts @@ -104,9 +104,8 @@ export class SembleCLI { const child = spawn(this.semblePath, args, { shell: false, timeout: options.timeout, - maxBuffer: 10 * 1024 * 1024, stdio: ["ignore", "pipe", "pipe"], - } as any) + }) let stdout = "" let stderr = "" diff --git a/src/services/code-index/semble/semble-downloader.ts b/src/services/code-index/semble/semble-downloader.ts index bd064e04a8..4e53f6af7a 100644 --- a/src/services/code-index/semble/semble-downloader.ts +++ b/src/services/code-index/semble/semble-downloader.ts @@ -20,6 +20,7 @@ const SEMBLE_ARCHIVES: Record = { const SEMBLE_VERSION = "v0.3.1" const DOWNLOAD_BASE_URL = `https://github.com/navedmerchant/sembleexec/releases/download/${SEMBLE_VERSION}` +const VERSION_FILE = ".semble-version" /** * Returns whether the current platform/arch has a prebuilt semble binary available. @@ -46,9 +47,35 @@ function getArchiveInfo(platform?: string, arch?: string): { archive: string; bi return SEMBLE_ARCHIVES[`${p}-${a}`] } +/** + * Reads the locally installed version from the version metadata file. + * Returns undefined if no version file exists (first install or legacy). + */ +async function getInstalledVersion(storageDir: string): Promise { + try { + const versionPath = path.join(storageDir, "semble", VERSION_FILE) + const version = (await fs.readFile(versionPath, "utf-8")).trim() + return version || undefined + } catch { + return undefined + } +} + +/** + * Writes the version metadata file after a successful download. + */ +async function writeInstalledVersion(storageDir: string, version: string): Promise { + const versionPath = path.join(storageDir, "semble", VERSION_FILE) + await fs.writeFile(versionPath, version, "utf-8") +} + /** * Downloads and extracts the semble archive for the current platform. * + * Compares the hardcoded SEMBLE_VERSION against the version stored on disk. + * If they differ (i.e. the version was bumped in source), it re-downloads. + * Otherwise it returns the existing binary path. + * * The archive is extracted into `storageDir/semble/` and the binary path * is `storageDir/semble/`. * @@ -67,21 +94,35 @@ export async function downloadSemble(storageDir: string): Promise Date: Sun, 31 May 2026 13:11:33 -0700 Subject: [PATCH 08/11] address coderabbit comments --- .../__tests__/semble-downloader.spec.ts | 63 ++++++++++++++----- .../code-index/semble/semble-downloader.ts | 54 +++++++++++++++- 2 files changed, 100 insertions(+), 17 deletions(-) diff --git a/src/services/code-index/semble/__tests__/semble-downloader.spec.ts b/src/services/code-index/semble/__tests__/semble-downloader.spec.ts index 575277f850..23add66c53 100644 --- a/src/services/code-index/semble/__tests__/semble-downloader.spec.ts +++ b/src/services/code-index/semble/__tests__/semble-downloader.spec.ts @@ -3,6 +3,22 @@ import * as fs from "fs/promises" import * as path from "path" import { EventEmitter } from "events" +// Mock crypto — verifyChecksum reads the archive file (mocked via createReadStream) +// and computes a SHA-256. We make digest() dynamically return the expected checksum +// for the current process.platform/arch so verification always passes in unit tests. +const CHECKSUMS: Record = { + "linux-x64": "2bd4117dbd1ff7a26ed5ef44dad8d43162a4b9f431ec0bcc9dd2f9c6f5952e28", + "linux-arm64": "177d14f41d3272594844a2635d59d97ad20400868a874a59169fd26a868c32a5", + "darwin-arm64": "9130f447ff2c21803853a9aee58268f0e05134326384ac23d8b74ed22905e118", + "win32-x64": "c8ae86f3703675e356824e08cf79c8a20c41c602296d2a5bff15bf35d762a46b", +} +vi.mock("crypto", () => ({ + createHash: vi.fn(() => ({ + update: vi.fn().mockReturnThis(), + digest: vi.fn(() => CHECKSUMS[`${process.platform}-${process.arch}`] ?? "no-match"), + })), +})) + // Mock fs/promises vi.mock("fs/promises", () => ({ mkdir: vi.fn().mockResolvedValue(undefined), @@ -14,27 +30,37 @@ vi.mock("fs/promises", () => ({ writeFile: vi.fn().mockResolvedValue(undefined), })) -// Mock fs (createWriteStream) +// Mock fs (createWriteStream and createReadStream for checksum verification) const mockWriteStream = { on: vi.fn(), close: vi.fn(), } vi.mock("fs", () => ({ createWriteStream: vi.fn(() => mockWriteStream), + createReadStream: vi.fn(() => { + const { EventEmitter } = require("events") + const stream = new EventEmitter() + setImmediate(() => { + stream.emit("data", Buffer.from("fake-archive-content")) + stream.emit("end") + }) + return stream + }), })) -// Mock https -const mockRequest = new EventEmitter() as any -mockRequest.setTimeout = vi.fn() - -const mockResponse = new EventEmitter() as any -mockResponse.statusCode = 200 -mockResponse.headers = {} -mockResponse.pipe = vi.fn() -mockResponse.destroy = vi.fn() +// Mock https — fresh emitters per invocation to avoid listener leaks across tests +let mockRequest: any +let mockResponse: any vi.mock("https", () => ({ get: vi.fn((_url: string, callback: (res: any) => void) => { + mockRequest = Object.assign(new EventEmitter(), { setTimeout: vi.fn() }) + mockResponse = Object.assign(new EventEmitter(), { + statusCode: 200, + headers: {}, + pipe: vi.fn(), + destroy: vi.fn(), + }) setImmediate(() => callback(mockResponse)) return mockRequest }), @@ -64,11 +90,6 @@ import { spawn } from "child_process" describe("semble-downloader", () => { beforeEach(() => { vi.clearAllMocks() - // Reset response defaults - mockResponse.statusCode = 200 - mockResponse.headers = {} - mockResponse.pipe = vi.fn() - mockResponse.destroy = vi.fn() mockWriteStream.on = vi.fn() mockWriteStream.close = vi.fn() }) @@ -252,7 +273,17 @@ describe("semble-downloader", () => { ;(fs.readFile as any).mockRejectedValue(new Error("ENOENT")) // Simulate HTTP error response - mockResponse.statusCode = 404 + ;(https.get as any).mockImplementation((_url: string, callback: (res: any) => void) => { + const res = Object.assign(new EventEmitter(), { + statusCode: 404, + headers: {}, + pipe: vi.fn(), + destroy: vi.fn(), + }) + setImmediate(() => callback(res)) + const req = Object.assign(new EventEmitter(), { setTimeout: vi.fn() }) + return req + }) try { await expect(downloadSemble("/storage")).rejects.toThrow("Failed to download semble") diff --git a/src/services/code-index/semble/semble-downloader.ts b/src/services/code-index/semble/semble-downloader.ts index 4e53f6af7a..74a4034856 100644 --- a/src/services/code-index/semble/semble-downloader.ts +++ b/src/services/code-index/semble/semble-downloader.ts @@ -2,6 +2,8 @@ import * as fs from "fs/promises" import * as path from "path" import * as https from "https" import { createWriteStream } from "fs" +import { createHash } from "crypto" +import { createReadStream } from "fs" import { spawn } from "child_process" /** @@ -22,6 +24,40 @@ const SEMBLE_VERSION = "v0.3.1" const DOWNLOAD_BASE_URL = `https://github.com/navedmerchant/sembleexec/releases/download/${SEMBLE_VERSION}` const VERSION_FILE = ".semble-version" +/** + * SHA-256 checksums for each platform archive at SEMBLE_VERSION. + * These are verified after download to guard against tampered release assets. + * Update these when bumping SEMBLE_VERSION. + * + * To regenerate: `shasum -a 256 ` + */ +const SEMBLE_SHA256: Record = { + "linux-x64": "2bd4117dbd1ff7a26ed5ef44dad8d43162a4b9f431ec0bcc9dd2f9c6f5952e28", + "linux-arm64": "177d14f41d3272594844a2635d59d97ad20400868a874a59169fd26a868c32a5", + "darwin-arm64": "9130f447ff2c21803853a9aee58268f0e05134326384ac23d8b74ed22905e118", + "win32-x64": "c8ae86f3703675e356824e08cf79c8a20c41c602296d2a5bff15bf35d762a46b", +} + +/** + * Verifies the SHA-256 checksum of a downloaded file against the expected value. + * Throws if the checksum does not match. + */ +export async function verifyChecksum(filePath: string, expected: string): Promise { + const hash = createHash("sha256") + await new Promise((resolve, reject) => { + const stream = createReadStream(filePath) + stream.on("data", (chunk) => hash.update(chunk)) + stream.on("end", resolve) + stream.on("error", reject) + }) + const actual = hash.digest("hex") + if (actual !== expected) { + throw new Error( + `Checksum mismatch for ${path.basename(filePath)}: expected ${expected.slice(0, 12)}…, got ${actual.slice(0, 12)}…`, + ) + } +} + /** * Returns whether the current platform/arch has a prebuilt semble binary available. */ @@ -127,6 +163,13 @@ export async function downloadSemble(storageDir: string): Promise { }) } +/** + * Escapes a string for use inside a PowerShell single-quoted literal. + * In PowerShell, the only special character in a single-quoted string is the + * apostrophe itself, which is escaped by doubling it. + */ +function escapePowerShellLiteral(value: string): string { + return value.replace(/'/g, "''") +} + /** * Extracts a .zip archive into the destination directory. * Uses PowerShell on Windows, unzip on other platforms. @@ -229,7 +281,7 @@ function extractZip(archivePath: string, destDir: string): Promise { [ "-NoProfile", "-Command", - `Expand-Archive -Path '${archivePath}' -DestinationPath '${destDir}' -Force`, + `Expand-Archive -Path '${escapePowerShellLiteral(archivePath)}' -DestinationPath '${escapePowerShellLiteral(destDir)}' -Force`, ], { shell: false, stdio: ["ignore", "pipe", "pipe"] }, ) From 62845a158d6319e1829b51cfbb7a11ed7d7a690f Mon Sep 17 00:00:00 2001 From: Naved Date: Sun, 31 May 2026 17:16:12 -0700 Subject: [PATCH 09/11] Update semble download url to zoo code org --- src/services/code-index/semble/semble-downloader.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/services/code-index/semble/semble-downloader.ts b/src/services/code-index/semble/semble-downloader.ts index 74a4034856..47b67f9cd4 100644 --- a/src/services/code-index/semble/semble-downloader.ts +++ b/src/services/code-index/semble/semble-downloader.ts @@ -8,7 +8,7 @@ import { spawn } from "child_process" /** * Supported platform/arch combinations for the semble standalone executable. - * Maps to archive names at https://github.com/navedmerchant/sembleexec/releases + * Maps to archive names at https://github.com/Zoo-Code-Org/sembleexec/releases * * Uses "fast-start" archives (one-dir builds) for ~20x faster startup * compared to single-file binaries. @@ -21,7 +21,7 @@ const SEMBLE_ARCHIVES: Record = { } const SEMBLE_VERSION = "v0.3.1" -const DOWNLOAD_BASE_URL = `https://github.com/navedmerchant/sembleexec/releases/download/${SEMBLE_VERSION}` +const DOWNLOAD_BASE_URL = `https://github.com/Zoo-Code-Org/sembleexec/releases/download/${SEMBLE_VERSION}` const VERSION_FILE = ".semble-version" /** From e89902dc297a8d23399abcbff70544447b62e5b1 Mon Sep 17 00:00:00 2001 From: Naved Date: Sun, 31 May 2026 17:56:17 -0700 Subject: [PATCH 10/11] Fix semble creating multiple caches --- .../semble/__tests__/provider.spec.ts | 103 +++++++++++++++--- src/services/code-index/semble/provider.ts | 39 +++++-- 2 files changed, 114 insertions(+), 28 deletions(-) diff --git a/src/services/code-index/semble/__tests__/provider.spec.ts b/src/services/code-index/semble/__tests__/provider.spec.ts index e72ebdd665..8b3fb83b0f 100644 --- a/src/services/code-index/semble/__tests__/provider.spec.ts +++ b/src/services/code-index/semble/__tests__/provider.spec.ts @@ -292,38 +292,108 @@ describe("SembleProvider", () => { expect(results[0].payload?.filePath).toBe("/workspace/src/good.ts") }) - it("should use directoryPrefix when provided", async () => { + it("should always search workspace root regardless of directoryPrefix", async () => { mockCli.search.mockResolvedValue([]) await provider.searchIndex("test", "/custom/path") - expect(mockCli.search).toHaveBeenCalledWith("test", "/custom/path", { + // Should always pass workspace root to semble, not the directoryPrefix + expect(mockCli.search).toHaveBeenCalledWith("test", "/workspace", { topK: SEMBLE_DEFAULTS.DEFAULT_TOP_K, content: SEMBLE_DEFAULTS.DEFAULT_CONTENT, }) }) - it("should resolve relative directoryPrefix against workspace path", async () => { + it("should always search workspace root with relative directoryPrefix", async () => { mockCli.search.mockResolvedValue([]) await provider.searchIndex("test", "src/subdir") - const expectedPath = path.join("/workspace", "src/subdir") - expect(mockCli.search).toHaveBeenCalledWith("test", expectedPath, { + // Should always pass workspace root to semble + expect(mockCli.search).toHaveBeenCalledWith("test", "/workspace", { topK: SEMBLE_DEFAULTS.DEFAULT_TOP_K, content: SEMBLE_DEFAULTS.DEFAULT_CONTENT, }) }) - it("should use absolute directoryPrefix as-is", async () => { - mockCli.search.mockResolvedValue([]) + it("should filter results by directoryPrefix when provided", async () => { + const mockResults = [ + { + chunk: { + content: "code in src/auth", + file_path: "src/auth/login.ts", + start_line: 1, + end_line: 10, + language: "typescript", + location: "src/auth/login.ts:1-10", + }, + score: 0.95, + }, + { + chunk: { + content: "code in src/utils", + file_path: "src/utils/helper.ts", + start_line: 5, + end_line: 15, + language: "typescript", + location: "src/utils/helper.ts:5-15", + }, + score: 0.8, + }, + { + chunk: { + content: "code in root", + file_path: "README.md", + start_line: 1, + end_line: 5, + language: "markdown", + location: "README.md:1-5", + }, + score: 0.6, + }, + ] - await provider.searchIndex("test", "/absolute/custom/path") + mockCli.search.mockResolvedValue(mockResults) - expect(mockCli.search).toHaveBeenCalledWith("test", "/absolute/custom/path", { - topK: SEMBLE_DEFAULTS.DEFAULT_TOP_K, - content: SEMBLE_DEFAULTS.DEFAULT_CONTENT, - }) + const results = await provider.searchIndex("test", "src/auth") + + // Only the src/auth result should pass the filter + expect(results).toHaveLength(1) + expect(results[0].payload?.filePath).toBe("/workspace/src/auth/login.ts") + }) + + it("should not filter results when no directoryPrefix is provided", async () => { + const mockResults = [ + { + chunk: { + content: "code in src/auth", + file_path: "src/auth/login.ts", + start_line: 1, + end_line: 10, + language: "typescript", + location: "src/auth/login.ts:1-10", + }, + score: 0.95, + }, + { + chunk: { + content: "code in src/utils", + file_path: "src/utils/helper.ts", + start_line: 5, + end_line: 15, + language: "typescript", + location: "src/utils/helper.ts:5-15", + }, + score: 0.8, + }, + ] + + mockCli.search.mockResolvedValue(mockResults) + + const results = await provider.searchIndex("test") + + // All results should be returned + expect(results).toHaveLength(2) }) it("should return empty array on search error and log telemetry", async () => { @@ -458,12 +528,12 @@ describe("SembleProvider", () => { expect(results[0].payload?.filePath).toContain("/") }) - it("should join file paths against the searchPath when directoryPrefix is provided", async () => { + it("should always join file paths against workspace root, even with directoryPrefix", async () => { const mockResults = [ { chunk: { content: "code", - file_path: "file.ts", + file_path: "src/file.ts", start_line: 1, end_line: 5, language: "typescript", @@ -475,9 +545,10 @@ describe("SembleProvider", () => { mockCli.search.mockResolvedValue(mockResults) - const results = await provider.searchIndex("test", "/custom/path") + // Even with a directoryPrefix, file paths are joined against workspace root + const results = await provider.searchIndex("test", "src") - expect(results[0].payload?.filePath).toBe("/custom/path/file.ts") + expect(results[0].payload?.filePath).toBe("/workspace/src/file.ts") }) it("should assign sequential semble-N IDs to results", async () => { diff --git a/src/services/code-index/semble/provider.ts b/src/services/code-index/semble/provider.ts index 6521904746..8f9457a77a 100644 --- a/src/services/code-index/semble/provider.ts +++ b/src/services/code-index/semble/provider.ts @@ -133,6 +133,11 @@ export class SembleProvider implements ISembleProvider { /** * Searches the codebase using `semble search`. + * + * Always searches the full workspace root to avoid creating separate + * Semble cache directories for each subdirectory. When directoryPrefix + * is provided, results are filtered post-search to only include files + * within that directory. */ async searchIndex(query: string, directoryPrefix?: string): Promise { if (!this._isInitialized) { @@ -145,22 +150,32 @@ export class SembleProvider implements ISembleProvider { } try { - let searchPath = this.workspacePath - if (directoryPrefix) { - // Resolve relative paths against the workspace root - searchPath = path.isAbsolute(directoryPrefix) - ? directoryPrefix - : path.join(this.workspacePath, directoryPrefix) - } - console.log(`[SembleProvider] Searching for "${query}" in ${searchPath}`) - const results = await this.cli.search(query, searchPath, { + // Always search the full workspace to maintain a single Semble cache. + // Semble creates a separate cache directory per path (SHA-256 of the + // resolved absolute path), so passing subdirectories would create + // redundant indexes and waste disk space. + console.log(`[SembleProvider] Searching for "${query}" in ${this.workspacePath}`) + const results = await this.cli.search(query, this.workspacePath, { topK: this.config.topK, content: this.config.content, }) - // Semble returns file paths relative to the search path. - // We join against searchPath (not workspacePath) to get correct absolute paths. - const converted = this._convertResults(results, searchPath) + // Semble returns file paths relative to the search path (workspace root). + // We join against workspacePath to produce correct absolute paths. + let converted = this._convertResults(results, this.workspacePath) + + // Filter results to the requested directory prefix, if any. + if (directoryPrefix) { + const normalizedPrefix = path.resolve(this.workspacePath, directoryPrefix).replace(/\\/g, "/") + converted = converted.filter((r) => { + const filePath = (r.payload?.filePath ?? "").replace(/\\/g, "/") + return filePath.startsWith(normalizedPrefix + "/") || filePath === normalizedPrefix + }) + console.log( + `[SembleProvider] Filtered to "${directoryPrefix}": ${converted.length} of ${results.length} results`, + ) + } + console.log( `[SembleProvider] Search returned ${converted.length} results (raw: ${results.length}). Sample path: ${converted[0]?.payload?.filePath ?? "none"}`, ) From 4281b30eaf1ffd055f6a91b888eb8f6da341ce7a Mon Sep 17 00:00:00 2001 From: Naved Date: Sun, 31 May 2026 23:10:41 -0700 Subject: [PATCH 11/11] fix unit test --- src/services/code-index/semble/provider.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/services/code-index/semble/provider.ts b/src/services/code-index/semble/provider.ts index 8f9457a77a..a429715954 100644 --- a/src/services/code-index/semble/provider.ts +++ b/src/services/code-index/semble/provider.ts @@ -166,7 +166,7 @@ export class SembleProvider implements ISembleProvider { // Filter results to the requested directory prefix, if any. if (directoryPrefix) { - const normalizedPrefix = path.resolve(this.workspacePath, directoryPrefix).replace(/\\/g, "/") + const normalizedPrefix = path.join(this.workspacePath, directoryPrefix).replace(/\\/g, "/") converted = converted.filter((r) => { const filePath = (r.payload?.filePath ?? "").replace(/\\/g, "/") return filePath.startsWith(normalizedPrefix + "/") || filePath === normalizedPrefix