diff --git a/packages/types/src/codebase-index.ts b/packages/types/src/codebase-index.ts index 61009ba301..0ce995a402 100644 --- a/packages/types/src/codebase-index.ts +++ b/packages/types/src/codebase-index.ts @@ -31,6 +31,7 @@ export const codebaseIndexConfigSchema = z.object({ "vercel-ai-gateway", "bedrock", "openrouter", + "semble", ]) .optional(), codebaseIndexEmbedderBaseUrl: z.string().optional(), @@ -67,6 +68,7 @@ export const codebaseIndexModelsSchema = z.object({ "vercel-ai-gateway": z.record(z.string(), z.object({ dimension: z.number() })).optional(), openrouter: z.record(z.string(), z.object({ dimension: z.number() })).optional(), bedrock: z.record(z.string(), z.object({ dimension: z.number() })).optional(), + semble: z.record(z.string(), z.object({ dimension: z.number() })).optional(), }) export type CodebaseIndexModels = z.infer diff --git a/packages/types/src/embedding.ts b/packages/types/src/embedding.ts index 1c5a92e1ac..f0597d96c6 100644 --- a/packages/types/src/embedding.ts +++ b/packages/types/src/embedding.ts @@ -6,7 +6,8 @@ export type EmbedderProvider = | "mistral" | "vercel-ai-gateway" | "bedrock" - | "openrouter" // Add other providers as needed. + | "openrouter" + | "semble" // Local hybrid search via semble CLI — no API keys or Qdrant required. export interface EmbeddingModelProfile { dimension: number diff --git a/packages/types/src/vscode-extension-host.ts b/packages/types/src/vscode-extension-host.ts index 5853e536de..be5130e9f2 100644 --- a/packages/types/src/vscode-extension-host.ts +++ b/packages/types/src/vscode-extension-host.ts @@ -375,6 +375,12 @@ export type ExtensionState = Pick< deviceName?: string debug?: boolean + /** + * Platform info for conditional feature support (e.g. semble binary availability). + */ + platform?: string + arch?: string + /** * Monotonically increasing sequence number for clineMessages state pushes. * When present, the frontend should only apply clineMessages from a state push @@ -660,6 +666,7 @@ export interface WebviewMessage { | "vercel-ai-gateway" | "bedrock" | "openrouter" + | "semble" codebaseIndexEmbedderBaseUrl?: string codebaseIndexEmbedderModelId: string codebaseIndexEmbedderModelDimension?: number // Generic dimension for all providers diff --git a/src/core/webview/ClineProvider.ts b/src/core/webview/ClineProvider.ts index d5d34d0e91..d1336025a2 100644 --- a/src/core/webview/ClineProvider.ts +++ b/src/core/webview/ClineProvider.ts @@ -2283,6 +2283,8 @@ export class ClineProvider } })(), ...zooCodeState, + platform: process.platform, + arch: process.arch, debug: vscode.workspace.getConfiguration(Package.name).get("debug", false), } } diff --git a/src/services/code-index/__tests__/config-manager.spec.ts b/src/services/code-index/__tests__/config-manager.spec.ts index 27815c0bef..1839eb464f 100644 --- a/src/services/code-index/__tests__/config-manager.spec.ts +++ b/src/services/code-index/__tests__/config-manager.spec.ts @@ -1127,6 +1127,86 @@ describe("CodeIndexConfigManager", () => { expect(requiresRestart).toBe(true) }) }) + + describe("semble provider configuration", () => { + it("should load semble provider configuration", async () => { + mockContextProxy.getGlobalState.mockReturnValue({ + codebaseIndexEnabled: true, + codebaseIndexEmbedderProvider: "semble", + }) + mockContextProxy.getSecret.mockReturnValue(undefined) + + const result = await configManager.loadConfiguration() + + expect(result.currentConfig.embedderProvider).toBe("semble") + expect(result.currentConfig.isConfigured).toBe(true) + }) + + it("should require restart when switching from openai to semble", async () => { + // Initial state with OpenAI + mockContextProxy.getGlobalState.mockReturnValue({ + codebaseIndexEnabled: true, + codebaseIndexQdrantUrl: "http://qdrant.local", + codebaseIndexEmbedderProvider: "openai", + codebaseIndexEmbedderModelId: "text-embedding-3-small", + }) + setupSecretMocks({ + codeIndexOpenAiKey: "test-key", + }) + + await configManager.loadConfiguration() + + // Switch to semble + mockContextProxy.getGlobalState.mockReturnValue({ + codebaseIndexEnabled: true, + codebaseIndexEmbedderProvider: "semble", + }) + mockContextProxy.getSecret.mockReturnValue(undefined) + + const result = await configManager.loadConfiguration() + expect(result.requiresRestart).toBe(true) + }) + + it("should require restart when switching from semble to openai", async () => { + // Initial state with semble + mockContextProxy.getGlobalState.mockReturnValue({ + codebaseIndexEnabled: true, + codebaseIndexEmbedderProvider: "semble", + }) + mockContextProxy.getSecret.mockReturnValue(undefined) + + await configManager.loadConfiguration() + + // Switch to openai + mockContextProxy.getGlobalState.mockReturnValue({ + codebaseIndexEnabled: true, + codebaseIndexQdrantUrl: "http://qdrant.local", + codebaseIndexEmbedderProvider: "openai", + codebaseIndexEmbedderModelId: "text-embedding-3-small", + }) + setupSecretMocks({ + codeIndexOpenAiKey: "test-key", + }) + + const result = await configManager.loadConfiguration() + expect(result.requiresRestart).toBe(true) + }) + + it("should not require restart when semble config stays the same", async () => { + // Initial state with semble + mockContextProxy.getGlobalState.mockReturnValue({ + codebaseIndexEnabled: true, + codebaseIndexEmbedderProvider: "semble", + }) + mockContextProxy.getSecret.mockReturnValue(undefined) + + await configManager.loadConfiguration() + + // Same semble config again + const result = await configManager.loadConfiguration() + expect(result.requiresRestart).toBe(false) + }) + }) }) describe("isConfigured", () => { @@ -1684,6 +1764,30 @@ describe("CodeIndexConfigManager", () => { expect(configManager.isConfigured()).toBe(false) }) + it("should always return true for semble provider (no API keys or Qdrant needed)", () => { + mockContextProxy.getGlobalState.mockReturnValue({ + codebaseIndexEnabled: true, + codebaseIndexEmbedderProvider: "semble", + }) + mockContextProxy.getSecret.mockReturnValue(undefined) + + configManager = new CodeIndexConfigManager(mockContextProxy) + expect(configManager.isConfigured()).toBe(true) + }) + + it("should return true for semble even without any other configuration", () => { + mockContextProxy.getGlobalState.mockReturnValue({ + codebaseIndexEnabled: true, + codebaseIndexEmbedderProvider: "semble", + // No qdrant URL, no API keys + }) + mockContextProxy.getSecret.mockReturnValue(undefined) + + configManager = new CodeIndexConfigManager(mockContextProxy) + expect(configManager.isConfigured()).toBe(true) + expect(configManager.isFeatureConfigured).toBe(true) + }) + describe("currentModelDimension", () => { beforeEach(() => { vi.clearAllMocks() diff --git a/src/services/code-index/__tests__/service-factory.spec.ts b/src/services/code-index/__tests__/service-factory.spec.ts index 3e943ebd82..b24d5df2fc 100644 --- a/src/services/code-index/__tests__/service-factory.spec.ts +++ b/src/services/code-index/__tests__/service-factory.spec.ts @@ -356,6 +356,17 @@ describe("CodeIndexServiceFactory", () => { // Act & Assert expect(() => factory.createEmbedder()).toThrow("serviceFactory.invalidEmbedderType") }) + + it("should throw when provider is semble (semble handles its own embedding)", () => { + const testConfig = { + embedderProvider: "semble", + } + mockConfigManager.getConfig.mockReturnValue(testConfig as any) + + expect(() => factory.createEmbedder()).toThrow( + "Semble provider handles its own embedding. Do not call createEmbedder() for semble", + ) + }) }) describe("createVectorStore", () => { @@ -678,6 +689,17 @@ describe("CodeIndexServiceFactory", () => { // Act & Assert expect(() => factory.createVectorStore()).toThrow("serviceFactory.qdrantUrlMissing") }) + + it("should throw when provider is semble (semble handles its own vector storage)", () => { + const testConfig = { + embedderProvider: "semble", + } + mockConfigManager.getConfig.mockReturnValue(testConfig as any) + + expect(() => factory.createVectorStore()).toThrow( + "Semble provider handles its own vector storage. Do not call createVectorStore() for semble", + ) + }) }) describe("validateEmbedder", () => { diff --git a/src/services/code-index/config-manager.ts b/src/services/code-index/config-manager.ts index e7f239e621..abac552561 100644 --- a/src/services/code-index/config-manager.ts +++ b/src/services/code-index/config-manager.ts @@ -120,6 +120,8 @@ export class CodeIndexConfigManager { this.embedderProvider = "bedrock" } else if (codebaseIndexEmbedderProvider === "openrouter") { this.embedderProvider = "openrouter" + } else if (codebaseIndexEmbedderProvider === "semble") { + this.embedderProvider = "semble" } else { this.embedderProvider = "openai" } @@ -231,6 +233,11 @@ export class CodeIndexConfigManager { * Checks if the service is properly configured based on the embedder type. */ public isConfigured(): boolean { + if (this.embedderProvider === "semble") { + // Semble requires no API keys or Qdrant — it's always configured + return true + } + if (this.embedderProvider === "openai") { const openAiKey = this.openAiOptions?.openAiNativeApiKey const qdrantUrl = this.qdrantUrl diff --git a/src/services/code-index/interfaces/manager.ts b/src/services/code-index/interfaces/manager.ts index d657ad667c..cdda7a7053 100644 --- a/src/services/code-index/interfaces/manager.ts +++ b/src/services/code-index/interfaces/manager.ts @@ -84,6 +84,7 @@ export type EmbedderProvider = | "vercel-ai-gateway" | "bedrock" | "openrouter" + | "semble" export interface IndexProgressUpdate { systemStatus: IndexingState diff --git a/src/services/code-index/manager.ts b/src/services/code-index/manager.ts index 91ea515e40..245e8678f5 100644 --- a/src/services/code-index/manager.ts +++ b/src/services/code-index/manager.ts @@ -8,6 +8,7 @@ import { CodeIndexServiceFactory } from "./service-factory" import { CodeIndexSearchService } from "./search-service" import { CodeIndexOrchestrator } from "./orchestrator" import { CacheManager } from "./cache-manager" +import { SembleProvider } from "./semble" import { RooIgnoreController } from "../../core/ignore/RooIgnoreController" import fs from "fs/promises" import ignore from "ignore" @@ -27,6 +28,7 @@ export class CodeIndexManager { private _orchestrator: CodeIndexOrchestrator | undefined private _searchService: CodeIndexSearchService | undefined private _cacheManager: CacheManager | undefined + private _sembleProvider: SembleProvider | undefined // Flag to prevent race conditions during error recovery private _isRecoveringFromError = false @@ -125,6 +127,10 @@ export class CodeIndexManager { } private assertInitialized() { + if (this._sembleProvider) { + // When semble is active, we don't need orchestrator/searchService + return + } if (!this._configManager || !this._orchestrator || !this._searchService || !this._cacheManager) { throw new Error("CodeIndexManager not initialized. Call initialize() first.") } @@ -134,6 +140,9 @@ export class CodeIndexManager { if (!this.isFeatureEnabled) { return "Standby" } + if (this._sembleProvider) { + return this._sembleProvider.state + } this.assertInitialized() return this._orchestrator!.state } @@ -173,6 +182,9 @@ export class CodeIndexManager { if (this._orchestrator) { this._orchestrator.stopWatcher() } + if (this._sembleProvider) { + this._sembleProvider.stopIndexing() + } return { requiresRestart } } @@ -196,19 +208,27 @@ export class CodeIndexManager { } // 6. Determine if Core Services Need Recreation - const needsServiceRecreation = !this._serviceFactory || requiresRestart + const needsServiceRecreation = (!this._serviceFactory && !this._sembleProvider) || requiresRestart if (needsServiceRecreation) { await this._recreateServices() } // 7. Handle Indexing Start/Restart - const shouldStartOrRestartIndexing = - requiresRestart || - (needsServiceRecreation && (!this._orchestrator || this._orchestrator.state !== "Indexing")) + if (this._sembleProvider) { + // For semble, start indexing if needed + const shouldStartIndexing = requiresRestart || needsServiceRecreation + if (shouldStartIndexing) { + await this._sembleProvider.startIndexing() + } + } else { + const shouldStartOrRestartIndexing = + requiresRestart || + (needsServiceRecreation && (!this._orchestrator || this._orchestrator.state !== "Indexing")) - if (shouldStartOrRestartIndexing) { - this._orchestrator?.startIndexing() + if (shouldStartOrRestartIndexing) { + this._orchestrator?.startIndexing() + } } return { requiresRestart } @@ -226,6 +246,12 @@ export class CodeIndexManager { return } + // Delegate to semble provider if active + if (this._sembleProvider) { + await this._sembleProvider.startIndexing() + return + } + // Check if we're in error state and recover if needed const currentStatus = this.getCurrentStatus() if (currentStatus.systemStatus === "Error") { @@ -244,6 +270,10 @@ export class CodeIndexManager { * Stops any in-progress indexing operation and the file watcher. */ public stopIndexing(): void { + if (this._sembleProvider) { + this._sembleProvider.stopIndexing() + return + } if (this._orchestrator) { this._orchestrator.stopIndexing() } @@ -295,6 +325,7 @@ export class CodeIndexManager { this._serviceFactory = undefined this._orchestrator = undefined this._searchService = undefined + this._sembleProvider = undefined // Reset the flag after recovery is complete this._isRecoveringFromError = false @@ -306,6 +337,10 @@ export class CodeIndexManager { */ public dispose(): void { this.stopIndexing() + if (this._sembleProvider) { + this._sembleProvider.dispose() + this._sembleProvider = undefined + } this._stateManager.dispose() } @@ -317,6 +352,10 @@ export class CodeIndexManager { if (!this.isFeatureEnabled) { return } + if (this._sembleProvider) { + await this._sembleProvider.clearIndexData() + return + } this.assertInitialized() await this._orchestrator!.clearIndexData() await this._cacheManager!.clearCacheFile() @@ -338,6 +377,9 @@ export class CodeIndexManager { if (!this.isFeatureEnabled) { return [] } + if (this._sembleProvider) { + return this._sembleProvider.searchIndex(query, directoryPrefix) + } this.assertInitialized() return this._searchService!.searchIndex(query, directoryPrefix) } @@ -351,11 +393,23 @@ export class CodeIndexManager { if (this._orchestrator) { this.stopWatcher() } + // Dispose existing semble provider if switching away + if (this._sembleProvider) { + this._sembleProvider.dispose() + this._sembleProvider = undefined + } // Clear existing services to ensure clean state this._orchestrator = undefined this._searchService = undefined - // (Re)Initialize service factory + // Branch: if provider is "semble", create SembleProvider instead of external services + if (this._configManager!.currentEmbedderProvider === "semble") { + this._sembleProvider = new SembleProvider(this.workspacePath, this.context, this._stateManager) + await this._sembleProvider.initialize() + return + } + + // (Re)Initialize service factory for external providers this._serviceFactory = new CodeIndexServiceFactory( this._configManager!, this.workspacePath, diff --git a/src/services/code-index/semble/__tests__/provider.spec.ts b/src/services/code-index/semble/__tests__/provider.spec.ts new file mode 100644 index 0000000000..8b3fb83b0f --- /dev/null +++ b/src/services/code-index/semble/__tests__/provider.spec.ts @@ -0,0 +1,665 @@ +import { describe, it, expect, vi, beforeEach } from "vitest" +import * as path from "path" +import { SembleProvider } from "../provider" +import { SembleCLI } from "../semble-cli" +import { SEMBLE_DEFAULTS } from "../types" + +// Mock SembleCLI - use a shared mock instance +const sharedMockCli = { + checkInstalled: vi.fn(), + search: vi.fn(), + findRelated: vi.fn(), +} + +vi.mock("../semble-cli", () => ({ + SembleCLI: vi.fn().mockImplementation(() => sharedMockCli), +})) + +// Mock semble-downloader +vi.mock("../semble-downloader", () => ({ + isSembleSupportedPlatform: vi.fn().mockReturnValue(true), + downloadSemble: vi.fn().mockResolvedValue("/mock/storage/semble/semble"), +})) + +// Mock TelemetryService +vi.mock("@roo-code/telemetry", () => ({ + TelemetryService: { + instance: { + captureEvent: vi.fn(), + }, + }, +})) + +// Mock vscode +vi.mock("vscode", () => ({ + ExtensionContext: vi.fn(), +})) + +import { TelemetryService } from "@roo-code/telemetry" +import { TelemetryEventName } from "@roo-code/types" +import { isSembleSupportedPlatform, downloadSemble } from "../semble-downloader" + +describe("SembleProvider", () => { + let provider: SembleProvider + let mockCli: any + let mockStateManager: any + let mockContext: any + + beforeEach(() => { + vi.clearAllMocks() + ;(isSembleSupportedPlatform as any).mockReturnValue(true) + ;(downloadSemble as any).mockResolvedValue("/mock/storage/semble/semble") + + mockStateManager = { + setSystemState: vi.fn(), + } + + mockContext = { + globalStorageUri: { fsPath: "/mock/storage" }, + } + + provider = new SembleProvider("/workspace", mockContext, mockStateManager) + mockCli = sharedMockCli + }) + + describe("constructor", () => { + it("should create provider with default options", () => { + const p = new SembleProvider("/workspace", mockContext, mockStateManager) + expect(p).toBeDefined() + expect(p.state).toBe("Standby") + }) + + it("should create provider with custom topK and content", () => { + const p = new SembleProvider("/workspace", mockContext, mockStateManager, { + topK: 5, + content: "all", + }) + expect(p).toBeDefined() + }) + }) + + describe("initialize", () => { + it("should auto-download and set state to Indexed when semble works", async () => { + mockCli.checkInstalled.mockResolvedValue({ installed: true }) + + await provider.initialize() + + expect(downloadSemble).toHaveBeenCalledWith("/mock/storage") + expect(provider.state).toBe("Indexed") + expect(mockStateManager.setSystemState).toHaveBeenCalledWith( + "Indexed", + "Semble is ready. Searches index on-the-fly.", + ) + }) + + it("should set state to Error when platform is unsupported", async () => { + ;(isSembleSupportedPlatform as any).mockReturnValue(false) + + await provider.initialize() + + expect(provider.state).toBe("Error") + expect(mockStateManager.setSystemState).toHaveBeenCalledWith( + "Error", + expect.stringContaining("not supported on this platform"), + ) + }) + + it("should set state to Error when download fails", async () => { + ;(downloadSemble as any).mockRejectedValue(new Error("network error")) + + await provider.initialize() + + expect(provider.state).toBe("Error") + expect(mockStateManager.setSystemState).toHaveBeenCalledWith( + "Error", + expect.stringContaining("Failed to download semble"), + ) + }) + + it("should set state to Error when semble check fails after download", async () => { + mockCli.checkInstalled.mockResolvedValue({ + installed: false, + error: "binary not functional", + }) + + await provider.initialize() + + expect(provider.state).toBe("Error") + expect(mockStateManager.setSystemState).toHaveBeenCalledWith( + "Error", + expect.stringContaining("binary not functional"), + ) + }) + + it("should not re-initialize if already initialized", async () => { + mockCli.checkInstalled.mockResolvedValue({ installed: true }) + + await provider.initialize() + await provider.initialize() + + expect(mockCli.checkInstalled).toHaveBeenCalledTimes(1) + }) + }) + + describe("startIndexing", () => { + it("should initialize if not already initialized", async () => { + mockCli.checkInstalled.mockResolvedValue({ installed: true }) + + await provider.startIndexing() + + expect(provider.state).toBe("Indexed") + }) + + it("should not change state if in Error state", async () => { + ;(isSembleSupportedPlatform as any).mockReturnValue(false) + + await provider.initialize() + await provider.startIndexing() + + expect(provider.state).toBe("Error") + }) + + it("should mark as Indexed when already initialized", async () => { + mockCli.checkInstalled.mockResolvedValue({ installed: true }) + + await provider.initialize() + await provider.startIndexing() + + expect(provider.state).toBe("Indexed") + }) + }) + + describe("stopIndexing", () => { + it("should be a no-op", () => { + provider.stopIndexing() + // No error thrown, no state change + expect(provider.state).toBe("Standby") + }) + }) + + describe("searchIndex", () => { + beforeEach(async () => { + mockCli.checkInstalled.mockResolvedValue({ installed: true }) + await provider.initialize() + }) + + it("should return empty array when not initialized", async () => { + const uninitializedProvider = new SembleProvider("/workspace", mockContext, mockStateManager) + const results = await uninitializedProvider.searchIndex("test query") + expect(results).toEqual([]) + }) + + it("should search using CLI and convert results", async () => { + const mockResults = [ + { + chunk: { + content: "function authenticate() {}", + file_path: "src/auth.ts", + start_line: 10, + end_line: 25, + language: "typescript", + location: "src/auth.ts:10-25", + }, + score: 0.92, + }, + { + chunk: { + content: "export function login() {}", + file_path: "src/login.ts", + start_line: 5, + end_line: 15, + language: "typescript", + location: "src/login.ts:5-15", + }, + score: 0.78, + }, + ] + + mockCli.search.mockResolvedValue(mockResults) + + const results = await provider.searchIndex("authentication") + + expect(mockCli.search).toHaveBeenCalledWith("authentication", "/workspace", { + topK: SEMBLE_DEFAULTS.DEFAULT_TOP_K, + content: SEMBLE_DEFAULTS.DEFAULT_CONTENT, + }) + + expect(results).toHaveLength(2) + expect(results[0]).toEqual({ + id: "semble-0", + score: 0.92, + payload: { + filePath: "/workspace/src/auth.ts", + codeChunk: "function authenticate() {}", + startLine: 10, + endLine: 25, + }, + }) + expect(results[1]).toEqual({ + id: "semble-1", + score: 0.78, + payload: { + filePath: "/workspace/src/login.ts", + codeChunk: "export function login() {}", + startLine: 5, + endLine: 15, + }, + }) + }) + + it("should filter out results with missing file_path", async () => { + const mockResults = [ + { + chunk: { + content: "good result", + file_path: "src/good.ts", + start_line: 1, + end_line: 10, + language: "typescript", + location: "src/good.ts:1-10", + }, + score: 0.8, + }, + { + chunk: { + content: "no file path result", + file_path: "", + start_line: 1, + end_line: 5, + language: "typescript", + location: "", + }, + score: 0.5, + }, + { + chunk: { + content: "null file path result", + file_path: null, + start_line: 1, + end_line: 5, + language: null, + location: "", + }, + score: 0.3, + }, + ] + + mockCli.search.mockResolvedValue(mockResults) + + const results = await provider.searchIndex("test") + + expect(results).toHaveLength(1) + expect(results[0].payload?.filePath).toBe("/workspace/src/good.ts") + }) + + it("should always search workspace root regardless of directoryPrefix", async () => { + mockCli.search.mockResolvedValue([]) + + await provider.searchIndex("test", "/custom/path") + + // Should always pass workspace root to semble, not the directoryPrefix + expect(mockCli.search).toHaveBeenCalledWith("test", "/workspace", { + topK: SEMBLE_DEFAULTS.DEFAULT_TOP_K, + content: SEMBLE_DEFAULTS.DEFAULT_CONTENT, + }) + }) + + it("should always search workspace root with relative directoryPrefix", async () => { + mockCli.search.mockResolvedValue([]) + + await provider.searchIndex("test", "src/subdir") + + // Should always pass workspace root to semble + expect(mockCli.search).toHaveBeenCalledWith("test", "/workspace", { + topK: SEMBLE_DEFAULTS.DEFAULT_TOP_K, + content: SEMBLE_DEFAULTS.DEFAULT_CONTENT, + }) + }) + + it("should filter results by directoryPrefix when provided", async () => { + const mockResults = [ + { + chunk: { + content: "code in src/auth", + file_path: "src/auth/login.ts", + start_line: 1, + end_line: 10, + language: "typescript", + location: "src/auth/login.ts:1-10", + }, + score: 0.95, + }, + { + chunk: { + content: "code in src/utils", + file_path: "src/utils/helper.ts", + start_line: 5, + end_line: 15, + language: "typescript", + location: "src/utils/helper.ts:5-15", + }, + score: 0.8, + }, + { + chunk: { + content: "code in root", + file_path: "README.md", + start_line: 1, + end_line: 5, + language: "markdown", + location: "README.md:1-5", + }, + score: 0.6, + }, + ] + + mockCli.search.mockResolvedValue(mockResults) + + const results = await provider.searchIndex("test", "src/auth") + + // Only the src/auth result should pass the filter + expect(results).toHaveLength(1) + expect(results[0].payload?.filePath).toBe("/workspace/src/auth/login.ts") + }) + + it("should not filter results when no directoryPrefix is provided", async () => { + const mockResults = [ + { + chunk: { + content: "code in src/auth", + file_path: "src/auth/login.ts", + start_line: 1, + end_line: 10, + language: "typescript", + location: "src/auth/login.ts:1-10", + }, + score: 0.95, + }, + { + chunk: { + content: "code in src/utils", + file_path: "src/utils/helper.ts", + start_line: 5, + end_line: 15, + language: "typescript", + location: "src/utils/helper.ts:5-15", + }, + score: 0.8, + }, + ] + + mockCli.search.mockResolvedValue(mockResults) + + const results = await provider.searchIndex("test") + + // All results should be returned + expect(results).toHaveLength(2) + }) + + it("should return empty array on search error and log telemetry", async () => { + mockCli.search.mockRejectedValue(new Error("Search failed")) + + const results = await provider.searchIndex("test") + + expect(results).toEqual([]) + expect(TelemetryService.instance.captureEvent).toHaveBeenCalledWith( + TelemetryEventName.CODE_INDEX_ERROR, + expect.objectContaining({ + location: "SembleProvider.searchIndex", + }), + ) + }) + + it("should return empty array when in Error state", async () => { + ;(isSembleSupportedPlatform as any).mockReturnValue(false) + const errorProvider = new SembleProvider("/workspace", mockContext, mockStateManager) + await errorProvider.initialize() + ;(isSembleSupportedPlatform as any).mockReturnValue(true) // reset for other tests + const results = await errorProvider.searchIndex("test") + expect(results).toEqual([]) + }) + }) + + describe("clearIndexData", () => { + it("should reset state to Standby", async () => { + mockCli.checkInstalled.mockResolvedValue({ installed: true }) + await provider.initialize() + + await provider.clearIndexData() + + expect(provider.state).toBe("Standby") + expect(mockStateManager.setSystemState).toHaveBeenCalledWith( + "Standby", + "Semble provider reset. On-disk cache remains until next rebuild.", + ) + }) + }) + + describe("dispose", () => { + it("should reset initialization state", async () => { + mockCli.checkInstalled.mockResolvedValue({ installed: true }) + await provider.initialize() + + provider.dispose() + + // After dispose, searchIndex should return empty array + const results = await provider.searchIndex("test") + expect(results).toEqual([]) + }) + }) + + describe("_convertResults edge cases", () => { + beforeEach(async () => { + mockCli.checkInstalled.mockResolvedValue({ installed: true }) + await provider.initialize() + }) + + it("should handle results with null content using empty string fallback", async () => { + const mockResults = [ + { + chunk: { + content: null, + file_path: "src/file.ts", + start_line: null, + end_line: null, + language: null, + location: "", + }, + score: 0.6, + }, + ] + + mockCli.search.mockResolvedValue(mockResults) + + const results = await provider.searchIndex("test") + + expect(results).toHaveLength(1) + expect(results[0].payload?.codeChunk).toBe("") + expect(results[0].payload?.startLine).toBe(0) + expect(results[0].payload?.endLine).toBe(0) + }) + + it("should handle results with undefined content fields", async () => { + const mockResults = [ + { + chunk: { + content: undefined, + file_path: "src/file.ts", + start_line: undefined, + end_line: undefined, + language: undefined, + location: "", + }, + score: 0.5, + }, + ] + + mockCli.search.mockResolvedValue(mockResults) + + const results = await provider.searchIndex("test") + + expect(results).toHaveLength(1) + expect(results[0].payload?.codeChunk).toBe("") + expect(results[0].payload?.startLine).toBe(0) + expect(results[0].payload?.endLine).toBe(0) + }) + + it("should normalize backslashes in file paths", async () => { + const mockResults = [ + { + chunk: { + content: "code", + file_path: "src\\nested\\file.ts", + start_line: 1, + end_line: 10, + language: "typescript", + location: "", + }, + score: 0.8, + }, + ] + + mockCli.search.mockResolvedValue(mockResults) + + const results = await provider.searchIndex("test") + + expect(results).toHaveLength(1) + expect(results[0].payload?.filePath).not.toContain("\\") + expect(results[0].payload?.filePath).toContain("/") + }) + + it("should always join file paths against workspace root, even with directoryPrefix", async () => { + const mockResults = [ + { + chunk: { + content: "code", + file_path: "src/file.ts", + start_line: 1, + end_line: 5, + language: "typescript", + location: "", + }, + score: 0.9, + }, + ] + + mockCli.search.mockResolvedValue(mockResults) + + // Even with a directoryPrefix, file paths are joined against workspace root + const results = await provider.searchIndex("test", "src") + + expect(results[0].payload?.filePath).toBe("/workspace/src/file.ts") + }) + + it("should assign sequential semble-N IDs to results", async () => { + const mockResults = [ + { + chunk: { + content: "a", + file_path: "a.ts", + start_line: 1, + end_line: 2, + language: "ts", + location: "", + }, + score: 0.9, + }, + { + chunk: { + content: "b", + file_path: "b.ts", + start_line: 1, + end_line: 2, + language: "ts", + location: "", + }, + score: 0.8, + }, + { + chunk: { + content: "c", + file_path: "c.ts", + start_line: 1, + end_line: 2, + language: "ts", + location: "", + }, + score: 0.7, + }, + ] + + mockCli.search.mockResolvedValue(mockResults) + + const results = await provider.searchIndex("test") + + expect(results[0].id).toBe("semble-0") + expect(results[1].id).toBe("semble-1") + expect(results[2].id).toBe("semble-2") + }) + }) + + describe("initialize error edge cases", () => { + it("should set Error state when download returns no path (undefined)", async () => { + ;(downloadSemble as any).mockResolvedValue(undefined) + + await provider.initialize() + + expect(provider.state).toBe("Error") + expect(mockStateManager.setSystemState).toHaveBeenCalledWith( + "Error", + expect.stringContaining("Failed to download semble"), + ) + }) + + it("should set Error state with default message when checkInstalled returns no error string", async () => { + mockCli.checkInstalled.mockResolvedValue({ + installed: false, + error: undefined, + }) + + await provider.initialize() + + expect(provider.state).toBe("Error") + expect(mockStateManager.setSystemState).toHaveBeenCalledWith( + "Error", + expect.stringContaining("Semble binary is not functional"), + ) + }) + }) + + describe("custom config options", () => { + it("should pass custom topK to CLI search", async () => { + const customProvider = new SembleProvider("/workspace", mockContext, mockStateManager, { + topK: 5, + }) + + mockCli.checkInstalled.mockResolvedValue({ installed: true }) + await customProvider.initialize() + mockCli.search.mockResolvedValue([]) + + await customProvider.searchIndex("test") + + expect(mockCli.search).toHaveBeenCalledWith("test", "/workspace", { + topK: 5, + content: "code", + }) + }) + + it("should pass custom content type to CLI search", async () => { + const customProvider = new SembleProvider("/workspace", mockContext, mockStateManager, { + content: "all", + }) + + mockCli.checkInstalled.mockResolvedValue({ installed: true }) + await customProvider.initialize() + mockCli.search.mockResolvedValue([]) + + await customProvider.searchIndex("test") + + expect(mockCli.search).toHaveBeenCalledWith("test", "/workspace", { + topK: 10, + content: "all", + }) + }) + }) +}) diff --git a/src/services/code-index/semble/__tests__/semble-cli.spec.ts b/src/services/code-index/semble/__tests__/semble-cli.spec.ts new file mode 100644 index 0000000000..5f692969f7 --- /dev/null +++ b/src/services/code-index/semble/__tests__/semble-cli.spec.ts @@ -0,0 +1,379 @@ +import { describe, it, expect, vi, beforeEach } from "vitest" +import { EventEmitter } from "events" +import { SembleCLI } from "../semble-cli" + +// Mock spawn +const mockSpawn = vi.fn() + +vi.mock("child_process", () => ({ + spawn: (...args: any[]) => mockSpawn(...args), +})) + +/** + * Helper to create a fake child process that emits stdout/stderr and closes. + */ +function createMockProcess(stdout: string, stderr: string, exitCode: number) { + const proc = new EventEmitter() as any + proc.stdout = new EventEmitter() + proc.stderr = new EventEmitter() + + // Schedule data emission and close on next tick + setImmediate(() => { + if (stdout) proc.stdout.emit("data", Buffer.from(stdout)) + if (stderr) proc.stderr.emit("data", Buffer.from(stderr)) + proc.emit("close", exitCode) + }) + + return proc +} + +/** + * Helper to create a mock process that emits an error. + */ +function createErrorProcess(errorMessage: string) { + const proc = new EventEmitter() as any + proc.stdout = new EventEmitter() + proc.stderr = new EventEmitter() + + setImmediate(() => { + proc.emit("error", new Error(errorMessage)) + }) + + return proc +} + +describe("SembleCLI", () => { + let cli: SembleCLI + + beforeEach(() => { + vi.clearAllMocks() + cli = new SembleCLI("semble") + }) + + describe("constructor", () => { + it("should accept a path to the semble executable", () => { + const customCli = new SembleCLI("/usr/local/bin/semble") + expect(customCli).toBeDefined() + }) + }) + + describe("checkInstalled", () => { + it("should return installed: true when --help succeeds", async () => { + mockSpawn.mockReturnValueOnce(createMockProcess("usage: semble ...", "", 0)) + + const result = await cli.checkInstalled() + + expect(result).toEqual({ installed: true }) + expect(mockSpawn).toHaveBeenCalledWith("semble", ["--help"], expect.objectContaining({ shell: false })) + }) + + it("should return installed: false when semble --help fails", async () => { + mockSpawn.mockReturnValueOnce(createMockProcess("", "semble: command not found", 127)) + + const result = await cli.checkInstalled() + + expect(result.installed).toBe(false) + expect(result.error).toContain("semble: command not found") + }) + + it("should return installed: false on spawn error", async () => { + mockSpawn.mockReturnValueOnce(createErrorProcess("spawn ENOENT")) + + const result = await cli.checkInstalled() + + expect(result.installed).toBe(false) + expect(result.error).toContain("spawn ENOENT") + }) + }) + + describe("search", () => { + it("should spawn with array args (no shell)", async () => { + const jsonResponse = JSON.stringify({ query: "auth", results: [] }) + mockSpawn.mockReturnValue(createMockProcess(jsonResponse, "", 0)) + + await cli.search("authentication", "/path/to/repo") + + expect(mockSpawn).toHaveBeenCalledWith( + "semble", + ["search", "authentication", "/path/to/repo", "-k", "10"], + expect.objectContaining({ shell: false }), + ) + }) + + it("should pass special characters safely in query (no shell interpretation)", async () => { + const jsonResponse = JSON.stringify({ query: "test", results: [] }) + mockSpawn.mockReturnValue(createMockProcess(jsonResponse, "", 0)) + + await cli.search('test $(rm -rf /) `whoami` "injection"', "/repo") + + // With spawn (no shell), these are just string args — not interpreted + expect(mockSpawn).toHaveBeenCalledWith( + "semble", + ["search", 'test $(rm -rf /) `whoami` "injection"', "/repo", "-k", "10"], + expect.objectContaining({ shell: false }), + ) + }) + + it("should build correct args with custom topK", async () => { + mockSpawn.mockReturnValue(createMockProcess(JSON.stringify({ query: "test", results: [] }), "", 0)) + + await cli.search("test", "/repo", { topK: 5 }) + + expect(mockSpawn).toHaveBeenCalledWith("semble", ["search", "test", "/repo", "-k", "5"], expect.any(Object)) + }) + + it("should add --content flag for non-default content types", async () => { + mockSpawn.mockReturnValue(createMockProcess(JSON.stringify({ query: "test", results: [] }), "", 0)) + + await cli.search("test", "/repo", { content: "all" }) + + expect(mockSpawn).toHaveBeenCalledWith( + "semble", + ["search", "test", "/repo", "-k", "10", "--content", "all"], + expect.any(Object), + ) + }) + + it("should not add --content flag for code (default)", async () => { + mockSpawn.mockReturnValue(createMockProcess(JSON.stringify({ query: "test", results: [] }), "", 0)) + + await cli.search("test", "/repo", { content: "code" }) + + expect(mockSpawn).toHaveBeenCalledWith( + "semble", + ["search", "test", "/repo", "-k", "10"], + expect.any(Object), + ) + }) + + it("should throw error when semble search fails", async () => { + mockSpawn.mockReturnValue(createMockProcess("", "Error: something went wrong", 1)) + + await expect(cli.search("test", "/repo")).rejects.toThrow("Semble search failed") + }) + }) + + describe("findRelated", () => { + it("should build correct args with default options", async () => { + mockSpawn.mockReturnValue(createMockProcess(JSON.stringify({ query: "related", results: [] }), "", 0)) + + await cli.findRelated("src/auth.ts", 42, "/repo") + + expect(mockSpawn).toHaveBeenCalledWith( + "semble", + ["find-related", "src/auth.ts", "42", "/repo", "-k", "10"], + expect.any(Object), + ) + }) + + it("should build correct args with custom topK and content", async () => { + mockSpawn.mockReturnValue(createMockProcess(JSON.stringify({ query: "related", results: [] }), "", 0)) + + await cli.findRelated("src/auth.ts", 42, "/repo", { topK: 3, content: "all" }) + + expect(mockSpawn).toHaveBeenCalledWith( + "semble", + ["find-related", "src/auth.ts", "42", "/repo", "-k", "3", "--content", "all"], + expect.any(Object), + ) + }) + + it("should not add --content flag for code (default)", async () => { + mockSpawn.mockReturnValue(createMockProcess(JSON.stringify({ query: "related", results: [] }), "", 0)) + + await cli.findRelated("src/auth.ts", 42, "/repo", { content: "code" }) + + expect(mockSpawn).toHaveBeenCalledWith( + "semble", + ["find-related", "src/auth.ts", "42", "/repo", "-k", "10"], + expect.any(Object), + ) + }) + + it("should add --content flag for docs content type", async () => { + mockSpawn.mockReturnValue(createMockProcess(JSON.stringify({ query: "related", results: [] }), "", 0)) + + await cli.findRelated("src/auth.ts", 42, "/repo", { content: "docs" }) + + expect(mockSpawn).toHaveBeenCalledWith( + "semble", + ["find-related", "src/auth.ts", "42", "/repo", "-k", "10", "--content", "docs"], + expect.any(Object), + ) + }) + + it("should throw error when semble find-related fails", async () => { + mockSpawn.mockReturnValue(createMockProcess("", "Error: no chunk found", 1)) + + await expect(cli.findRelated("src/auth.ts", 42, "/repo")).rejects.toThrow("Semble find-related failed") + }) + + it("should throw with message when find-related fails with empty stderr", async () => { + mockSpawn.mockReturnValue(createMockProcess("", "", 1)) + + await expect(cli.findRelated("src/auth.ts", 42, "/repo")).rejects.toThrow("Semble find-related failed") + }) + + it("should parse results from find-related", async () => { + const jsonResponse = { + query: "related", + results: [ + { + chunk: { + content: "related code", + file_path: "src/related.ts", + start_line: 1, + end_line: 10, + language: "typescript", + location: "src/related.ts:1-10", + }, + score: 0.85, + }, + ], + } + mockSpawn.mockReturnValue(createMockProcess(JSON.stringify(jsonResponse), "", 0)) + + const results = await cli.findRelated("src/auth.ts", 42, "/repo") + + expect(results).toHaveLength(1) + expect(results[0].chunk.file_path).toBe("src/related.ts") + expect(results[0].score).toBe(0.85) + }) + }) + + describe("_parseOutput (via search)", () => { + it("should parse v0.3.0+ JSON format with nested chunk", async () => { + const jsonResponse = { + query: "authentication", + results: [ + { + chunk: { + content: "function authenticate() {}", + file_path: "src/auth.ts", + start_line: 10, + end_line: 25, + language: "typescript", + location: "src/auth.ts:10-25", + }, + score: 0.92, + }, + { + chunk: { + content: "export function login() {}", + file_path: "src/login.ts", + start_line: 5, + end_line: 15, + language: "typescript", + location: "src/login.ts:5-15", + }, + score: 0.78, + }, + ], + } + + mockSpawn.mockReturnValue(createMockProcess(JSON.stringify(jsonResponse), "", 0)) + + const results = await cli.search("authentication", "/repo") + + expect(results).toHaveLength(2) + expect(results[0].chunk.file_path).toBe("src/auth.ts") + expect(results[0].chunk.start_line).toBe(10) + expect(results[0].chunk.end_line).toBe(25) + expect(results[0].chunk.content).toBe("function authenticate() {}") + expect(results[0].score).toBe(0.92) + expect(results[1].chunk.file_path).toBe("src/login.ts") + expect(results[1].score).toBe(0.78) + }) + + it("should handle empty results response", async () => { + mockSpawn.mockReturnValue(createMockProcess(JSON.stringify({ query: "nonexistent", results: [] }), "", 0)) + + const results = await cli.search("nonexistent", "/repo") + + expect(results).toEqual([]) + }) + + it("should handle error response from semble", async () => { + mockSpawn.mockReturnValue(createMockProcess(JSON.stringify({ error: "No results found." }), "", 0)) + + const results = await cli.search("nonexistent", "/repo") + + expect(results).toEqual([]) + }) + + it("should handle empty stdout", async () => { + mockSpawn.mockReturnValue(createMockProcess("", "", 0)) + + const results = await cli.search("test", "/repo") + + expect(results).toEqual([]) + }) + + it("should handle whitespace-only stdout", async () => { + mockSpawn.mockReturnValue(createMockProcess(" \n \n ", "", 0)) + + const results = await cli.search("test", "/repo") + + expect(results).toEqual([]) + }) + + it("should handle non-JSON output gracefully", async () => { + mockSpawn.mockReturnValue(createMockProcess("Some plain text output that is not JSON", "", 0)) + + const results = await cli.search("test", "/repo") + + expect(results).toEqual([]) + }) + + it("should handle flat array format (older semble format)", async () => { + const flatArray = [ + { + chunk: { + content: "old format result", + file_path: "src/old.ts", + start_line: 1, + end_line: 5, + language: "typescript", + location: "src/old.ts:1-5", + }, + score: 0.7, + }, + ] + mockSpawn.mockReturnValue(createMockProcess(JSON.stringify(flatArray), "", 0)) + + const results = await cli.search("test", "/repo") + + expect(results).toHaveLength(1) + expect(results[0].chunk.file_path).toBe("src/old.ts") + expect(results[0].score).toBe(0.7) + }) + + it("should return empty array for unexpected JSON structure", async () => { + mockSpawn.mockReturnValue(createMockProcess(JSON.stringify({ unexpected: "format" }), "", 0)) + + const results = await cli.search("test", "/repo") + + expect(results).toEqual([]) + }) + }) + + describe("search error handling", () => { + it("should include stderr in error message when available", async () => { + mockSpawn.mockReturnValue(createMockProcess("", "Permission denied: /repo", 1)) + + await expect(cli.search("test", "/repo")).rejects.toThrow("Permission denied: /repo") + }) + + it("should fall back to process exit message when stderr is empty", async () => { + mockSpawn.mockReturnValue(createMockProcess("", "", 1)) + + await expect(cli.search("test", "/repo")).rejects.toThrow("Semble search failed") + }) + + it("should handle spawn error during search", async () => { + mockSpawn.mockReturnValue(createErrorProcess("EACCES: permission denied")) + + await expect(cli.search("test", "/repo")).rejects.toThrow("EACCES: permission denied") + }) + }) +}) diff --git a/src/services/code-index/semble/__tests__/semble-downloader.spec.ts b/src/services/code-index/semble/__tests__/semble-downloader.spec.ts new file mode 100644 index 0000000000..23add66c53 --- /dev/null +++ b/src/services/code-index/semble/__tests__/semble-downloader.spec.ts @@ -0,0 +1,640 @@ +import { describe, it, expect, vi, beforeEach } from "vitest" +import * as fs from "fs/promises" +import * as path from "path" +import { EventEmitter } from "events" + +// Mock crypto — verifyChecksum reads the archive file (mocked via createReadStream) +// and computes a SHA-256. We make digest() dynamically return the expected checksum +// for the current process.platform/arch so verification always passes in unit tests. +const CHECKSUMS: Record = { + "linux-x64": "2bd4117dbd1ff7a26ed5ef44dad8d43162a4b9f431ec0bcc9dd2f9c6f5952e28", + "linux-arm64": "177d14f41d3272594844a2635d59d97ad20400868a874a59169fd26a868c32a5", + "darwin-arm64": "9130f447ff2c21803853a9aee58268f0e05134326384ac23d8b74ed22905e118", + "win32-x64": "c8ae86f3703675e356824e08cf79c8a20c41c602296d2a5bff15bf35d762a46b", +} +vi.mock("crypto", () => ({ + createHash: vi.fn(() => ({ + update: vi.fn().mockReturnThis(), + digest: vi.fn(() => CHECKSUMS[`${process.platform}-${process.arch}`] ?? "no-match"), + })), +})) + +// Mock fs/promises +vi.mock("fs/promises", () => ({ + mkdir: vi.fn().mockResolvedValue(undefined), + access: vi.fn(), + chmod: vi.fn().mockResolvedValue(undefined), + unlink: vi.fn().mockResolvedValue(undefined), + rm: vi.fn().mockResolvedValue(undefined), + readFile: vi.fn(), + writeFile: vi.fn().mockResolvedValue(undefined), +})) + +// Mock fs (createWriteStream and createReadStream for checksum verification) +const mockWriteStream = { + on: vi.fn(), + close: vi.fn(), +} +vi.mock("fs", () => ({ + createWriteStream: vi.fn(() => mockWriteStream), + createReadStream: vi.fn(() => { + const { EventEmitter } = require("events") + const stream = new EventEmitter() + setImmediate(() => { + stream.emit("data", Buffer.from("fake-archive-content")) + stream.emit("end") + }) + return stream + }), +})) + +// Mock https — fresh emitters per invocation to avoid listener leaks across tests +let mockRequest: any +let mockResponse: any + +vi.mock("https", () => ({ + get: vi.fn((_url: string, callback: (res: any) => void) => { + mockRequest = Object.assign(new EventEmitter(), { setTimeout: vi.fn() }) + mockResponse = Object.assign(new EventEmitter(), { + statusCode: 200, + headers: {}, + pipe: vi.fn(), + destroy: vi.fn(), + }) + setImmediate(() => callback(mockResponse)) + return mockRequest + }), +})) + +// Mock child_process spawn for tar/unzip extraction +const mockExtractProcess = new EventEmitter() as any +mockExtractProcess.stderr = new EventEmitter() + +vi.mock("child_process", () => ({ + spawn: vi.fn(() => { + // Simulate successful extraction + setImmediate(() => mockExtractProcess.emit("close", 0)) + return mockExtractProcess + }), +})) + +import { + isSembleSupportedPlatform, + getSembleSupportedPlatforms, + downloadSemble, + getSembleBinaryPath, +} from "../semble-downloader" +import * as https from "https" +import { spawn } from "child_process" + +describe("semble-downloader", () => { + beforeEach(() => { + vi.clearAllMocks() + mockWriteStream.on = vi.fn() + mockWriteStream.close = vi.fn() + }) + + describe("isSembleSupportedPlatform", () => { + it("should return true for linux-x64", () => { + expect(isSembleSupportedPlatform("linux", "x64")).toBe(true) + }) + + it("should return true for linux-arm64", () => { + expect(isSembleSupportedPlatform("linux", "arm64")).toBe(true) + }) + + it("should return true for darwin-arm64", () => { + expect(isSembleSupportedPlatform("darwin", "arm64")).toBe(true) + }) + + it("should return true for win32-x64", () => { + expect(isSembleSupportedPlatform("win32", "x64")).toBe(true) + }) + + it("should return false for darwin-x64 (Intel Mac not supported)", () => { + expect(isSembleSupportedPlatform("darwin", "x64")).toBe(false) + }) + + it("should return false for win32-arm64", () => { + expect(isSembleSupportedPlatform("win32", "arm64")).toBe(false) + }) + + it("should return false for freebsd-x64", () => { + expect(isSembleSupportedPlatform("freebsd", "x64")).toBe(false) + }) + + it("should use process.platform and process.arch when no args provided", () => { + const result = isSembleSupportedPlatform() + expect(typeof result).toBe("boolean") + }) + }) + + describe("getSembleSupportedPlatforms", () => { + it("should return all supported platform-arch combinations", () => { + const platforms = getSembleSupportedPlatforms() + + expect(platforms).toContain("linux-x64") + expect(platforms).toContain("linux-arm64") + expect(platforms).toContain("darwin-arm64") + expect(platforms).toContain("win32-x64") + expect(platforms).toHaveLength(4) + }) + }) + + describe("downloadSemble", () => { + it("should return undefined on unsupported platform", async () => { + const originalPlatform = Object.getOwnPropertyDescriptor(process, "platform") + const originalArch = Object.getOwnPropertyDescriptor(process, "arch") + + Object.defineProperty(process, "platform", { value: "freebsd", configurable: true }) + Object.defineProperty(process, "arch", { value: "x64", configurable: true }) + + try { + const result = await downloadSemble("/some/dir") + expect(result).toBeUndefined() + } finally { + if (originalPlatform) Object.defineProperty(process, "platform", originalPlatform) + if (originalArch) Object.defineProperty(process, "arch", originalArch) + } + }) + + it("should return existing binary path if already extracted", async () => { + const originalPlatform = Object.getOwnPropertyDescriptor(process, "platform") + const originalArch = Object.getOwnPropertyDescriptor(process, "arch") + + Object.defineProperty(process, "platform", { value: "darwin", configurable: true }) + Object.defineProperty(process, "arch", { value: "arm64", configurable: true }) + + // fs.access resolves => file exists + ;(fs.access as any).mockResolvedValue(undefined) + // Version file matches current version + ;(fs.readFile as any).mockResolvedValue("v0.3.1") + + try { + const result = await downloadSemble("/storage") + + expect(result).toBe(path.join("/storage", "semble", "semble")) + expect(fs.mkdir).toHaveBeenCalledWith("/storage", { recursive: true }) + expect(fs.chmod).toHaveBeenCalledWith(path.join("/storage", "semble", "semble"), 0o755) + // Should NOT attempt to download + expect(https.get).not.toHaveBeenCalled() + } finally { + if (originalPlatform) Object.defineProperty(process, "platform", originalPlatform) + if (originalArch) Object.defineProperty(process, "arch", originalArch) + } + }) + + it("should download and extract archive when not present", async () => { + const originalPlatform = Object.getOwnPropertyDescriptor(process, "platform") + const originalArch = Object.getOwnPropertyDescriptor(process, "arch") + + Object.defineProperty(process, "platform", { value: "linux", configurable: true }) + Object.defineProperty(process, "arch", { value: "x64", configurable: true }) + + // fs.access rejects => file not present + ;(fs.access as any).mockRejectedValue(new Error("ENOENT")) + // No version file exists + ;(fs.readFile as any).mockRejectedValue(new Error("ENOENT")) + + // Simulate successful download: pipe is called, then "finish" fires + mockWriteStream.on.mockImplementation((event: string, cb: () => void) => { + if (event === "finish") { + setImmediate(cb) + } + }) + + try { + const result = await downloadSemble("/storage") + + expect(result).toBe(path.join("/storage", "semble", "semble")) + expect(https.get).toHaveBeenCalledWith( + expect.stringContaining("semble-linux-x64-fast.tar.gz"), + expect.any(Function), + ) + // Should call tar for extraction + expect(spawn).toHaveBeenCalledWith( + "tar", + [ + "-xzf", + path.join("/storage", "semble-linux-x64-fast.tar.gz"), + "-C", + path.join("/storage", "semble"), + ], + expect.any(Object), + ) + expect(fs.chmod).toHaveBeenCalledWith(path.join("/storage", "semble", "semble"), 0o755) + // Version file should be written + expect(fs.writeFile).toHaveBeenCalledWith( + path.join("/storage", "semble", ".semble-version"), + "v0.3.1", + "utf-8", + ) + // Archive should be cleaned up + expect(fs.unlink).toHaveBeenCalledWith(path.join("/storage", "semble-linux-x64-fast.tar.gz")) + } finally { + if (originalPlatform) Object.defineProperty(process, "platform", originalPlatform) + if (originalArch) Object.defineProperty(process, "arch", originalArch) + } + }) + + it("should not chmod on windows", async () => { + const originalPlatform = Object.getOwnPropertyDescriptor(process, "platform") + const originalArch = Object.getOwnPropertyDescriptor(process, "arch") + + Object.defineProperty(process, "platform", { value: "win32", configurable: true }) + Object.defineProperty(process, "arch", { value: "x64", configurable: true }) + + // fs.access resolves => file exists + ;(fs.access as any).mockResolvedValue(undefined) + // Version file matches + ;(fs.readFile as any).mockResolvedValue("v0.3.1") + + try { + const result = await downloadSemble("/storage") + + expect(result).toBe(path.join("/storage", "semble", "semble.exe")) + expect(fs.chmod).not.toHaveBeenCalled() + } finally { + if (originalPlatform) Object.defineProperty(process, "platform", originalPlatform) + if (originalArch) Object.defineProperty(process, "arch", originalArch) + } + }) + + it("should throw and clean up on download failure", async () => { + const originalPlatform = Object.getOwnPropertyDescriptor(process, "platform") + const originalArch = Object.getOwnPropertyDescriptor(process, "arch") + + Object.defineProperty(process, "platform", { value: "linux", configurable: true }) + Object.defineProperty(process, "arch", { value: "arm64", configurable: true }) + + // fs.access rejects => file not present + ;(fs.access as any).mockRejectedValue(new Error("ENOENT")) + // No version file + ;(fs.readFile as any).mockRejectedValue(new Error("ENOENT")) + + // Simulate HTTP error response + ;(https.get as any).mockImplementation((_url: string, callback: (res: any) => void) => { + const res = Object.assign(new EventEmitter(), { + statusCode: 404, + headers: {}, + pipe: vi.fn(), + destroy: vi.fn(), + }) + setImmediate(() => callback(res)) + const req = Object.assign(new EventEmitter(), { setTimeout: vi.fn() }) + return req + }) + + try { + await expect(downloadSemble("/storage")).rejects.toThrow("Failed to download semble") + expect(fs.unlink).toHaveBeenCalledWith(path.join("/storage", "semble-linux-arm64-fast.tar.gz")) + expect(fs.rm).toHaveBeenCalledWith(path.join("/storage", "semble"), { recursive: true, force: true }) + } finally { + if (originalPlatform) Object.defineProperty(process, "platform", originalPlatform) + if (originalArch) Object.defineProperty(process, "arch", originalArch) + } + }) + + it("should follow redirects", async () => { + const originalPlatform = Object.getOwnPropertyDescriptor(process, "platform") + const originalArch = Object.getOwnPropertyDescriptor(process, "arch") + + Object.defineProperty(process, "platform", { value: "darwin", configurable: true }) + Object.defineProperty(process, "arch", { value: "arm64", configurable: true }) + + // fs.access rejects => file not present + ;(fs.access as any).mockRejectedValue(new Error("ENOENT")) + // No version file + ;(fs.readFile as any).mockRejectedValue(new Error("ENOENT")) + + // First call returns a redirect, second call returns 200 + let callCount = 0 + ;(https.get as any).mockImplementation((_url: string, callback: (res: any) => void) => { + callCount++ + const res = new EventEmitter() as any + if (callCount === 1) { + res.statusCode = 302 + res.headers = { location: "https://cdn.example.com/semble-macos-arm64-fast.tar.gz" } + res.destroy = vi.fn() + } else { + res.statusCode = 200 + res.headers = {} + res.pipe = vi.fn() + res.destroy = vi.fn() + } + setImmediate(() => callback(res)) + + const req = new EventEmitter() as any + req.setTimeout = vi.fn() + return req + }) + + // Simulate successful download on the second response + mockWriteStream.on.mockImplementation((event: string, cb: () => void) => { + if (event === "finish") { + setImmediate(cb) + } + }) + + try { + const result = await downloadSemble("/storage") + + expect(result).toBe(path.join("/storage", "semble", "semble")) + expect(https.get).toHaveBeenCalledTimes(2) + } finally { + if (originalPlatform) Object.defineProperty(process, "platform", originalPlatform) + if (originalArch) Object.defineProperty(process, "arch", originalArch) + } + }) + }) + + describe("getSembleBinaryPath", () => { + it("should return path when binary exists", async () => { + const originalPlatform = Object.getOwnPropertyDescriptor(process, "platform") + const originalArch = Object.getOwnPropertyDescriptor(process, "arch") + + Object.defineProperty(process, "platform", { value: "linux", configurable: true }) + Object.defineProperty(process, "arch", { value: "x64", configurable: true }) + ;(fs.access as any).mockResolvedValue(undefined) + + try { + const result = await getSembleBinaryPath("/storage") + expect(result).toBe(path.join("/storage", "semble", "semble")) + } finally { + if (originalPlatform) Object.defineProperty(process, "platform", originalPlatform) + if (originalArch) Object.defineProperty(process, "arch", originalArch) + } + }) + + it("should return undefined when binary does not exist", async () => { + const originalPlatform = Object.getOwnPropertyDescriptor(process, "platform") + const originalArch = Object.getOwnPropertyDescriptor(process, "arch") + + Object.defineProperty(process, "platform", { value: "linux", configurable: true }) + Object.defineProperty(process, "arch", { value: "x64", configurable: true }) + ;(fs.access as any).mockRejectedValue(new Error("ENOENT")) + + try { + const result = await getSembleBinaryPath("/storage") + expect(result).toBeUndefined() + } finally { + if (originalPlatform) Object.defineProperty(process, "platform", originalPlatform) + if (originalArch) Object.defineProperty(process, "arch", originalArch) + } + }) + + it("should return undefined on unsupported platform", async () => { + const originalPlatform = Object.getOwnPropertyDescriptor(process, "platform") + const originalArch = Object.getOwnPropertyDescriptor(process, "arch") + + Object.defineProperty(process, "platform", { value: "freebsd", configurable: true }) + Object.defineProperty(process, "arch", { value: "x64", configurable: true }) + + try { + const result = await getSembleBinaryPath("/storage") + expect(result).toBeUndefined() + } finally { + if (originalPlatform) Object.defineProperty(process, "platform", originalPlatform) + if (originalArch) Object.defineProperty(process, "arch", originalArch) + } + }) + + it("should use correct binary name for windows", async () => { + const originalPlatform = Object.getOwnPropertyDescriptor(process, "platform") + const originalArch = Object.getOwnPropertyDescriptor(process, "arch") + + Object.defineProperty(process, "platform", { value: "win32", configurable: true }) + Object.defineProperty(process, "arch", { value: "x64", configurable: true }) + ;(fs.access as any).mockResolvedValue(undefined) + + try { + const result = await getSembleBinaryPath("/storage") + expect(result).toBe(path.join("/storage", "semble", "semble.exe")) + } finally { + if (originalPlatform) Object.defineProperty(process, "platform", originalPlatform) + if (originalArch) Object.defineProperty(process, "arch", originalArch) + } + }) + }) + + describe("downloadSemble - zip extraction on Windows", () => { + it("should use PowerShell Expand-Archive on Windows", async () => { + const originalPlatform = Object.getOwnPropertyDescriptor(process, "platform") + const originalArch = Object.getOwnPropertyDescriptor(process, "arch") + + Object.defineProperty(process, "platform", { value: "win32", configurable: true }) + Object.defineProperty(process, "arch", { value: "x64", configurable: true }) + + // fs.access rejects => file not present, triggering download + ;(fs.access as any).mockRejectedValue(new Error("ENOENT")) + // No version file + ;(fs.readFile as any).mockRejectedValue(new Error("ENOENT")) + + // Simulate successful download + mockWriteStream.on.mockImplementation((event: string, cb: () => void) => { + if (event === "finish") { + setImmediate(cb) + } + }) + + try { + const result = await downloadSemble("/storage") + + expect(result).toBe(path.join("/storage", "semble", "semble.exe")) + // Should call PowerShell for zip extraction + expect(spawn).toHaveBeenCalledWith( + "powershell", + expect.arrayContaining(["-NoProfile", "-Command", expect.stringContaining("Expand-Archive")]), + expect.any(Object), + ) + // Should NOT call chmod on windows + expect(fs.chmod).not.toHaveBeenCalled() + } finally { + if (originalPlatform) Object.defineProperty(process, "platform", originalPlatform) + if (originalArch) Object.defineProperty(process, "arch", originalArch) + } + }) + }) + + describe("downloadSemble - error handling edge cases", () => { + it("should not throw when archive cleanup fails after successful extraction", async () => { + const originalPlatform = Object.getOwnPropertyDescriptor(process, "platform") + const originalArch = Object.getOwnPropertyDescriptor(process, "arch") + + Object.defineProperty(process, "platform", { value: "linux", configurable: true }) + Object.defineProperty(process, "arch", { value: "x64", configurable: true }) + + // fs.access rejects => file not present + ;(fs.access as any).mockRejectedValue(new Error("ENOENT")) + // No version file + ;(fs.readFile as any).mockRejectedValue(new Error("ENOENT")) + + // Simulate successful download + mockWriteStream.on.mockImplementation((event: string, cb: () => void) => { + if (event === "finish") { + setImmediate(cb) + } + }) + + // Archive cleanup fails but should not throw (only archive removal after extraction) + ;(fs.unlink as any).mockRejectedValue(new Error("unlink cleanup failed")) + + try { + const result = await downloadSemble("/storage") + // Should still succeed — archive cleanup failure is ignored + expect(result).toBe(path.join("/storage", "semble", "semble")) + } finally { + if (originalPlatform) Object.defineProperty(process, "platform", originalPlatform) + if (originalArch) Object.defineProperty(process, "arch", originalArch) + } + }) + }) + + describe("downloadSemble - version tracking", () => { + it("should re-download when installed version differs from SEMBLE_VERSION", async () => { + const originalPlatform = Object.getOwnPropertyDescriptor(process, "platform") + const originalArch = Object.getOwnPropertyDescriptor(process, "arch") + + Object.defineProperty(process, "platform", { value: "linux", configurable: true }) + Object.defineProperty(process, "arch", { value: "x64", configurable: true }) + + // Version file has an old version + ;(fs.readFile as any).mockResolvedValue("v0.2.0") + // Binary doesn't matter — version mismatch forces re-download + ;(fs.access as any).mockRejectedValue(new Error("ENOENT")) + + // Simulate successful download + mockWriteStream.on.mockImplementation((event: string, cb: () => void) => { + if (event === "finish") { + setImmediate(cb) + } + }) + + try { + const result = await downloadSemble("/storage") + + expect(result).toBe(path.join("/storage", "semble", "semble")) + // Should remove old installation + expect(fs.rm).toHaveBeenCalledWith(path.join("/storage", "semble"), { + recursive: true, + force: true, + }) + // Should download the new version + expect(https.get).toHaveBeenCalledWith(expect.stringContaining("v0.3.1"), expect.any(Function)) + // Should write the new version file + expect(fs.writeFile).toHaveBeenCalledWith( + path.join("/storage", "semble", ".semble-version"), + "v0.3.1", + "utf-8", + ) + } finally { + if (originalPlatform) Object.defineProperty(process, "platform", originalPlatform) + if (originalArch) Object.defineProperty(process, "arch", originalArch) + } + }) + + it("should skip download when installed version matches SEMBLE_VERSION and binary exists", async () => { + const originalPlatform = Object.getOwnPropertyDescriptor(process, "platform") + const originalArch = Object.getOwnPropertyDescriptor(process, "arch") + + Object.defineProperty(process, "platform", { value: "linux", configurable: true }) + Object.defineProperty(process, "arch", { value: "x64", configurable: true }) + + // Version matches + ;(fs.readFile as any).mockResolvedValue("v0.3.1") + // Binary exists + ;(fs.access as any).mockResolvedValue(undefined) + + try { + const result = await downloadSemble("/storage") + + expect(result).toBe(path.join("/storage", "semble", "semble")) + // Should NOT download + expect(https.get).not.toHaveBeenCalled() + // Should NOT remove the extract dir + expect(fs.rm).not.toHaveBeenCalled() + } finally { + if (originalPlatform) Object.defineProperty(process, "platform", originalPlatform) + if (originalArch) Object.defineProperty(process, "arch", originalArch) + } + }) + + it("should re-download when version matches but binary is missing", async () => { + const originalPlatform = Object.getOwnPropertyDescriptor(process, "platform") + const originalArch = Object.getOwnPropertyDescriptor(process, "arch") + + Object.defineProperty(process, "platform", { value: "linux", configurable: true }) + Object.defineProperty(process, "arch", { value: "x64", configurable: true }) + + // Version matches + ;(fs.readFile as any).mockResolvedValue("v0.3.1") + // But binary is missing + ;(fs.access as any).mockRejectedValue(new Error("ENOENT")) + + // Simulate successful download + mockWriteStream.on.mockImplementation((event: string, cb: () => void) => { + if (event === "finish") { + setImmediate(cb) + } + }) + + try { + const result = await downloadSemble("/storage") + + expect(result).toBe(path.join("/storage", "semble", "semble")) + // Should download since binary was missing + expect(https.get).toHaveBeenCalled() + // Should write version file again + expect(fs.writeFile).toHaveBeenCalledWith( + path.join("/storage", "semble", ".semble-version"), + "v0.3.1", + "utf-8", + ) + } finally { + if (originalPlatform) Object.defineProperty(process, "platform", originalPlatform) + if (originalArch) Object.defineProperty(process, "arch", originalArch) + } + }) + + it("should download when no version file exists (first install)", async () => { + const originalPlatform = Object.getOwnPropertyDescriptor(process, "platform") + const originalArch = Object.getOwnPropertyDescriptor(process, "arch") + + Object.defineProperty(process, "platform", { value: "linux", configurable: true }) + Object.defineProperty(process, "arch", { value: "x64", configurable: true }) + + // No version file + ;(fs.readFile as any).mockRejectedValue(new Error("ENOENT")) + // No binary + ;(fs.access as any).mockRejectedValue(new Error("ENOENT")) + + // Simulate successful download + mockWriteStream.on.mockImplementation((event: string, cb: () => void) => { + if (event === "finish") { + setImmediate(cb) + } + }) + + try { + const result = await downloadSemble("/storage") + + expect(result).toBe(path.join("/storage", "semble", "semble")) + expect(https.get).toHaveBeenCalled() + // Should NOT try to rm the old dir (no previous version) + expect(fs.rm).not.toHaveBeenCalledWith( + path.join("/storage", "semble"), + expect.objectContaining({ recursive: true }), + ) + // Should write version file + expect(fs.writeFile).toHaveBeenCalledWith( + path.join("/storage", "semble", ".semble-version"), + "v0.3.1", + "utf-8", + ) + } finally { + if (originalPlatform) Object.defineProperty(process, "platform", originalPlatform) + if (originalArch) Object.defineProperty(process, "arch", originalArch) + } + }) + }) +}) diff --git a/src/services/code-index/semble/index.ts b/src/services/code-index/semble/index.ts new file mode 100644 index 0000000000..e63115e076 --- /dev/null +++ b/src/services/code-index/semble/index.ts @@ -0,0 +1,17 @@ +export { SembleCLI } from "./semble-cli" +export { SembleProvider } from "./provider" +export { + isSembleSupportedPlatform, + getSembleSupportedPlatforms, + downloadSemble, + getSembleBinaryPath, +} from "./semble-downloader" +export type { + ISembleProvider, + SembleSearchResult, + SembleChunk, + SembleCheckResult, + SembleConfig, + SembleContentType, +} from "./types" +export { SEMBLE_DEFAULTS } from "./types" diff --git a/src/services/code-index/semble/provider.ts b/src/services/code-index/semble/provider.ts new file mode 100644 index 0000000000..a429715954 --- /dev/null +++ b/src/services/code-index/semble/provider.ts @@ -0,0 +1,241 @@ +import * as path from "path" +import * as vscode from "vscode" + +import { IndexingState } from "../interfaces/manager" +import { VectorStoreSearchResult } from "../interfaces/vector-store" +import { CodeIndexStateManager } from "../state-manager" +import { SembleCLI } from "./semble-cli" +import { downloadSemble, isSembleSupportedPlatform } from "./semble-downloader" +import { ISembleProvider, SembleConfig, SembleContentType, SembleSearchResult, SEMBLE_DEFAULTS } from "./types" +import { TelemetryService } from "@roo-code/telemetry" +import { TelemetryEventName } from "@roo-code/types" + +/** + * Orchestrates code search via the semble CLI. + * + * Semble indexes on-the-fly with each search call — there is no separate + * "indexing" step. The provider automatically downloads the semble binary + * on first use, then delegates search queries to `semble search`. + * + * When `embedderProvider === "semble"`, the CodeIndexManager delegates + * to this provider instead of the ServiceFactory → orchestrator pipeline. + */ +export class SembleProvider implements ISembleProvider { + private cli!: SembleCLI + private readonly workspacePath: string + private readonly config: SembleConfig + private readonly stateManager: CodeIndexStateManager + private readonly context: vscode.ExtensionContext + + private _state: IndexingState = "Standby" + private _isInitialized = false + + constructor( + workspacePath: string, + context: vscode.ExtensionContext, + stateManager: CodeIndexStateManager, + options?: { topK?: number; content?: SembleContentType }, + ) { + this.workspacePath = workspacePath + this.context = context + this.stateManager = stateManager + + this.config = { + topK: options?.topK ?? SEMBLE_DEFAULTS.DEFAULT_TOP_K, + content: options?.content ?? SEMBLE_DEFAULTS.DEFAULT_CONTENT, + } + } + + get state(): IndexingState { + return this._state + } + + /** + * Initializes the provider: downloads semble, then validates it works. + */ + async initialize(): Promise { + if (this._isInitialized) { + return + } + + // Check platform support + if (!isSembleSupportedPlatform()) { + this._state = "Error" + this.stateManager.setSystemState( + "Error", + `Semble is not supported on this platform (${process.platform}-${process.arch}).`, + ) + console.error(`[SembleProvider] Unsupported platform: ${process.platform}-${process.arch}`) + return + } + + // Download semble binary + try { + this.stateManager.setSystemState("Indexing", "Downloading semble binary...") + const storageDir = this.context.globalStorageUri.fsPath + const binaryPath = await downloadSemble(storageDir) + if (!binaryPath) { + throw new Error("Download returned no path") + } + this.cli = new SembleCLI(binaryPath) + } catch (error: any) { + this._state = "Error" + this.stateManager.setSystemState("Error", `Failed to download semble: ${error?.message || error}`) + console.error("[SembleProvider] Download failed:", error?.message || error) + return + } + + // Verify the binary works + const checkResult = await this.cli.checkInstalled() + + if (!checkResult.installed) { + const errorMsg = checkResult.error || "Semble binary is not functional" + this._state = "Error" + this.stateManager.setSystemState("Error", `Semble check failed: ${errorMsg}`) + console.error("[SembleProvider] Semble check failed:", errorMsg) + return + } + + console.log("[SembleProvider] Semble found and ready.") + + // Semble indexes on-the-fly, so we mark as "Indexed" (ready for search) + this._state = "Indexed" + this.stateManager.setSystemState("Indexed", "Semble is ready. Searches index on-the-fly.") + + this._isInitialized = true + } + + /** + * Starts indexing. Since semble indexes on-the-fly with each search, + * this just validates the installation and marks as ready. + */ + async startIndexing(): Promise { + if (!this._isInitialized) { + await this.initialize() + } + + if (this._state === "Error") { + return + } + + // Semble indexes on-the-fly — no separate indexing step needed. + // Mark as indexed/ready. + this._state = "Indexed" + this.stateManager.setSystemState("Indexed", "Semble is ready. Searches index on-the-fly.") + } + + /** + * Stops indexing (no-op — semble has no background indexing process). + */ + stopIndexing(): void { + // No-op: semble indexes on-the-fly per search call + } + + /** + * Searches the codebase using `semble search`. + * + * Always searches the full workspace root to avoid creating separate + * Semble cache directories for each subdirectory. When directoryPrefix + * is provided, results are filtered post-search to only include files + * within that directory. + */ + async searchIndex(query: string, directoryPrefix?: string): Promise { + if (!this._isInitialized) { + console.warn("[SembleProvider] searchIndex called before initialization") + return [] + } + + if (this._state === "Error") { + return [] + } + + try { + // Always search the full workspace to maintain a single Semble cache. + // Semble creates a separate cache directory per path (SHA-256 of the + // resolved absolute path), so passing subdirectories would create + // redundant indexes and waste disk space. + console.log(`[SembleProvider] Searching for "${query}" in ${this.workspacePath}`) + const results = await this.cli.search(query, this.workspacePath, { + topK: this.config.topK, + content: this.config.content, + }) + + // Semble returns file paths relative to the search path (workspace root). + // We join against workspacePath to produce correct absolute paths. + let converted = this._convertResults(results, this.workspacePath) + + // Filter results to the requested directory prefix, if any. + if (directoryPrefix) { + const normalizedPrefix = path.join(this.workspacePath, directoryPrefix).replace(/\\/g, "/") + converted = converted.filter((r) => { + const filePath = (r.payload?.filePath ?? "").replace(/\\/g, "/") + return filePath.startsWith(normalizedPrefix + "/") || filePath === normalizedPrefix + }) + console.log( + `[SembleProvider] Filtered to "${directoryPrefix}": ${converted.length} of ${results.length} results`, + ) + } + + console.log( + `[SembleProvider] Search returned ${converted.length} results (raw: ${results.length}). Sample path: ${converted[0]?.payload?.filePath ?? "none"}`, + ) + return converted + } catch (error: any) { + const errorMessage = error?.message || String(error) + console.error("[SembleProvider] Search failed:", errorMessage) + + TelemetryService.instance.captureEvent(TelemetryEventName.CODE_INDEX_ERROR, { + error: errorMessage, + stack: error instanceof Error ? error.stack : undefined, + location: "SembleProvider.searchIndex", + }) + + return [] + } + } + + /** + * Clears index data. Semble manages its own cache at ~/Library/Caches/semble/ + * (or equivalent per-platform). This resets the provider state but does not + * delete semble's on-disk cache — use `semble clear-cache` for that. + */ + async clearIndexData(): Promise { + this._state = "Standby" + this.stateManager.setSystemState("Standby", "Semble provider reset. On-disk cache remains until next rebuild.") + } + + /** + * Disposes resources. + */ + dispose(): void { + this._isInitialized = false + } + + // --- Private Helpers --- + + /** + * Converts Semble CLI results to Zoo's VectorStoreSearchResult format. + * + * Semble v0.3.0+ returns results in the format: + * { chunk: { content, file_path, start_line, end_line, language, location }, score } + * + * Note: semble returns file paths relative to the path it was invoked with. + * We join against `basePath` (the actual path passed to semble) to produce + * correct absolute paths for the rest of the pipeline. + * Results with missing file paths are excluded. + */ + private _convertResults(results: SembleSearchResult[], basePath: string): VectorStoreSearchResult[] { + return results + .filter((r) => r.chunk?.file_path) // Exclude results with no file path + .map((r, index) => ({ + id: `semble-${index}`, + score: r.score, + payload: { + filePath: path.join(basePath, r.chunk.file_path).replace(/\\/g, "/"), + codeChunk: r.chunk?.content ?? "", + startLine: r.chunk?.start_line ?? 0, + endLine: r.chunk?.end_line ?? 0, + }, + })) + } +} diff --git a/src/services/code-index/semble/semble-cli.ts b/src/services/code-index/semble/semble-cli.ts new file mode 100644 index 0000000000..d3690a4c75 --- /dev/null +++ b/src/services/code-index/semble/semble-cli.ts @@ -0,0 +1,175 @@ +import { spawn } from "child_process" + +import { SembleSearchResult, SembleCheckResult, SembleContentType, SEMBLE_DEFAULTS } from "./types" + +/** + * Wraps the `semble` CLI for programmatic access. + * + * The semble binary is automatically downloaded on enablement via semble-downloader.ts. + * + * All methods spawn the semble process via child_process.spawn with array + * arguments (no shell) to prevent shell injection. + * + * Semble CLI (v0.3.0+) subcommands: + * search [path] — search a codebase + * find-related [path] — find similar code + * init — write sub-agent file + * savings — show token stats + * + * Common flags: + * -k, --top-k N — number of results (default: 5) + * --content TYPE [TYPE ...] — content types: code, docs, config, all + */ +export class SembleCLI { + private readonly semblePath: string + + constructor(semblePath: string) { + this.semblePath = semblePath + } + + /** + * Checks whether the semble binary is functional by running `semble --help`. + */ + async checkInstalled(): Promise { + try { + await this._spawn(["--help"], { timeout: 10_000 }) + return { installed: true } + } catch (error: any) { + return { + installed: false, + error: error?.stderr?.trim() || error?.message || "Failed to run semble", + } + } + } + + /** + * Searches a codebase. Semble indexes on-the-fly during search. + * + * Usage: semble search [path] [-k N] [--content TYPE [TYPE ...]] + */ + async search( + query: string, + repoPath: string, + options?: { topK?: number; content?: SembleContentType }, + ): Promise { + const topK = options?.topK ?? SEMBLE_DEFAULTS.DEFAULT_TOP_K + const args = ["search", query, repoPath, "-k", String(topK)] + if (options?.content && options.content !== "code") { + args.push("--content", options.content) + } + + try { + const { stdout } = await this._spawn(args, { timeout: 120_000 }) + return this._parseOutput(stdout) + } catch (error: any) { + const stderr = error?.stderr?.trim() || "" + const message = error?.message || String(error) + throw new Error(`Semble search failed: ${stderr || message}`) + } + } + + /** + * Finds code similar to a known location. + * + * Usage: semble find-related [path] [-k N] [--content TYPE [TYPE ...]] + */ + async findRelated( + filePath: string, + line: number, + repoPath: string, + options?: { topK?: number; content?: SembleContentType }, + ): Promise { + const topK = options?.topK ?? SEMBLE_DEFAULTS.DEFAULT_TOP_K + const args = ["find-related", filePath, String(line), repoPath, "-k", String(topK)] + if (options?.content && options.content !== "code") { + args.push("--content", options.content) + } + + try { + const { stdout } = await this._spawn(args, { timeout: 120_000 }) + return this._parseOutput(stdout) + } catch (error: any) { + const stderr = error?.stderr?.trim() || "" + const message = error?.message || String(error) + throw new Error(`Semble find-related failed: ${stderr || message}`) + } + } + + /** + * Spawns the semble process and collects stdout/stderr. + * Uses spawn without shell — args are passed as an array, no injection risk. + */ + private _spawn(args: string[], options: { timeout: number }): Promise<{ stdout: string; stderr: string }> { + return new Promise((resolve, reject) => { + const child = spawn(this.semblePath, args, { + shell: false, + timeout: options.timeout, + stdio: ["ignore", "pipe", "pipe"], + }) + + let stdout = "" + let stderr = "" + + child.stdout?.on("data", (data: Buffer) => { + stdout += data.toString() + }) + + child.stderr?.on("data", (data: Buffer) => { + stderr += data.toString() + }) + + child.on("error", (err: Error) => { + reject({ message: err.message, stderr }) + }) + + child.on("close", (code: number | null) => { + if (code === 0) { + resolve({ stdout, stderr }) + } else { + reject({ message: `Process exited with code ${code}`, stderr, stdout }) + } + }) + }) + } + + /** + * Parses semble CLI JSON output into structured results. + * + * Semble v0.3.0+ outputs JSON by default with format: + * { "query": "...", "results": [{ "chunk": { "content": "...", "file_path": "...", "start_line": N, "end_line": M, "language": "...", "location": "..." }, "score": X }] } + * + * If the query returns no results, semble outputs: + * { "error": "No results found." } + */ + private _parseOutput(stdout: string): SembleSearchResult[] { + const trimmed = stdout.trim() + if (!trimmed) { + return [] + } + + try { + const parsed = JSON.parse(trimmed) + + // Handle error response: {"error": "No results found."} + if (parsed.error) { + return [] + } + + // Handle successful response: {query, results: [{chunk, score}]} + if (parsed.results && Array.isArray(parsed.results)) { + return parsed.results as SembleSearchResult[] + } + + // Fallback: if it's a flat array (older format) + if (Array.isArray(parsed)) { + return parsed as SembleSearchResult[] + } + + return [] + } catch { + // Not JSON — this shouldn't happen with v0.3.0+ but handle gracefully + console.warn("[SembleCLI] Unexpected non-JSON output from semble") + return [] + } + } +} diff --git a/src/services/code-index/semble/semble-downloader.ts b/src/services/code-index/semble/semble-downloader.ts new file mode 100644 index 0000000000..47b67f9cd4 --- /dev/null +++ b/src/services/code-index/semble/semble-downloader.ts @@ -0,0 +1,366 @@ +import * as fs from "fs/promises" +import * as path from "path" +import * as https from "https" +import { createWriteStream } from "fs" +import { createHash } from "crypto" +import { createReadStream } from "fs" +import { spawn } from "child_process" + +/** + * Supported platform/arch combinations for the semble standalone executable. + * Maps to archive names at https://github.com/Zoo-Code-Org/sembleexec/releases + * + * Uses "fast-start" archives (one-dir builds) for ~20x faster startup + * compared to single-file binaries. + */ +const SEMBLE_ARCHIVES: Record = { + "linux-x64": { archive: "semble-linux-x64-fast.tar.gz", binary: "semble" }, + "linux-arm64": { archive: "semble-linux-arm64-fast.tar.gz", binary: "semble" }, + "darwin-arm64": { archive: "semble-macos-arm64-fast.tar.gz", binary: "semble" }, + "win32-x64": { archive: "semble-windows-x64-fast.zip", binary: "semble.exe" }, +} + +const SEMBLE_VERSION = "v0.3.1" +const DOWNLOAD_BASE_URL = `https://github.com/Zoo-Code-Org/sembleexec/releases/download/${SEMBLE_VERSION}` +const VERSION_FILE = ".semble-version" + +/** + * SHA-256 checksums for each platform archive at SEMBLE_VERSION. + * These are verified after download to guard against tampered release assets. + * Update these when bumping SEMBLE_VERSION. + * + * To regenerate: `shasum -a 256 ` + */ +const SEMBLE_SHA256: Record = { + "linux-x64": "2bd4117dbd1ff7a26ed5ef44dad8d43162a4b9f431ec0bcc9dd2f9c6f5952e28", + "linux-arm64": "177d14f41d3272594844a2635d59d97ad20400868a874a59169fd26a868c32a5", + "darwin-arm64": "9130f447ff2c21803853a9aee58268f0e05134326384ac23d8b74ed22905e118", + "win32-x64": "c8ae86f3703675e356824e08cf79c8a20c41c602296d2a5bff15bf35d762a46b", +} + +/** + * Verifies the SHA-256 checksum of a downloaded file against the expected value. + * Throws if the checksum does not match. + */ +export async function verifyChecksum(filePath: string, expected: string): Promise { + const hash = createHash("sha256") + await new Promise((resolve, reject) => { + const stream = createReadStream(filePath) + stream.on("data", (chunk) => hash.update(chunk)) + stream.on("end", resolve) + stream.on("error", reject) + }) + const actual = hash.digest("hex") + if (actual !== expected) { + throw new Error( + `Checksum mismatch for ${path.basename(filePath)}: expected ${expected.slice(0, 12)}…, got ${actual.slice(0, 12)}…`, + ) + } +} + +/** + * Returns whether the current platform/arch has a prebuilt semble binary available. + */ +export function isSembleSupportedPlatform(platform?: string, arch?: string): boolean { + const p = platform ?? process.platform + const a = arch ?? process.arch + return `${p}-${a}` in SEMBLE_ARCHIVES +} + +/** + * Returns the list of supported platform-arch keys (e.g. "linux-x64", "darwin-arm64"). + */ +export function getSembleSupportedPlatforms(): string[] { + return Object.keys(SEMBLE_ARCHIVES) +} + +/** + * Returns the archive info for the given platform/arch, or undefined if unsupported. + */ +function getArchiveInfo(platform?: string, arch?: string): { archive: string; binary: string } | undefined { + const p = platform ?? process.platform + const a = arch ?? process.arch + return SEMBLE_ARCHIVES[`${p}-${a}`] +} + +/** + * Reads the locally installed version from the version metadata file. + * Returns undefined if no version file exists (first install or legacy). + */ +async function getInstalledVersion(storageDir: string): Promise { + try { + const versionPath = path.join(storageDir, "semble", VERSION_FILE) + const version = (await fs.readFile(versionPath, "utf-8")).trim() + return version || undefined + } catch { + return undefined + } +} + +/** + * Writes the version metadata file after a successful download. + */ +async function writeInstalledVersion(storageDir: string, version: string): Promise { + const versionPath = path.join(storageDir, "semble", VERSION_FILE) + await fs.writeFile(versionPath, version, "utf-8") +} + +/** + * Downloads and extracts the semble archive for the current platform. + * + * Compares the hardcoded SEMBLE_VERSION against the version stored on disk. + * If they differ (i.e. the version was bumped in source), it re-downloads. + * Otherwise it returns the existing binary path. + * + * The archive is extracted into `storageDir/semble/` and the binary path + * is `storageDir/semble/`. + * + * @param storageDir - Directory to store the extracted binary (e.g. globalStorageUri.fsPath) + * @returns The full path to the semble executable, or undefined if the platform is unsupported. + */ +export async function downloadSemble(storageDir: string): Promise { + const info = getArchiveInfo() + if (!info) { + return undefined + } + + // Ensure storage directory exists + await fs.mkdir(storageDir, { recursive: true }) + + const extractDir = path.join(storageDir, "semble") + const binaryPath = path.join(extractDir, info.binary) + + // Check if already downloaded at the correct version + const installedVersion = await getInstalledVersion(storageDir) + + if (installedVersion === SEMBLE_VERSION) { + try { + await fs.access(binaryPath) + // Binary exists and version matches — nothing to do + if (process.platform !== "win32") { + await fs.chmod(binaryPath, 0o755) + } + return binaryPath + } catch { + // Binary missing despite version file — re-download below + } + } + + // Version mismatch — remove old installation before downloading new one + if (installedVersion && installedVersion !== SEMBLE_VERSION) { + console.log(`[SembleDownloader] Version changed from ${installedVersion} to ${SEMBLE_VERSION}, updating...`) + try { + await fs.rm(extractDir, { recursive: true, force: true }) + } catch { + // ignore cleanup errors + } + } + + const url = `${DOWNLOAD_BASE_URL}/${info.archive}` + const archivePath = path.join(storageDir, info.archive) + console.log(`[SembleDownloader] Downloading semble ${SEMBLE_VERSION} from ${url}`) + + try { + await downloadFile(url, archivePath) + + // Verify archive integrity before extraction + const platformKey = `${process.platform}-${process.arch}` + const expectedChecksum = SEMBLE_SHA256[platformKey] + if (expectedChecksum) { + await verifyChecksum(archivePath, expectedChecksum) + } + + // Extract the archive + await fs.mkdir(extractDir, { recursive: true }) + + if (info.archive.endsWith(".tar.gz")) { + await extractTarGz(archivePath, extractDir) + } else if (info.archive.endsWith(".zip")) { + await extractZip(archivePath, extractDir) + } + + // Make binary executable on unix platforms + if (process.platform !== "win32") { + await fs.chmod(binaryPath, 0o755) + } + + // Record the installed version + await writeInstalledVersion(storageDir, SEMBLE_VERSION) + + // Clean up the archive file + try { + await fs.unlink(archivePath) + } catch { + // ignore cleanup errors + } + + console.log(`[SembleDownloader] Successfully installed semble ${SEMBLE_VERSION} to ${binaryPath}`) + return binaryPath + } catch (error: any) { + // Clean up partial download/extraction + try { + await fs.unlink(archivePath) + } catch { + // ignore cleanup errors + } + try { + await fs.rm(extractDir, { recursive: true, force: true }) + } catch { + // ignore cleanup errors + } + console.error(`[SembleDownloader] Failed to download semble: ${error?.message || error}`) + throw new Error(`Failed to download semble: ${error?.message || error}`) + } +} + +/** + * Returns the path to the semble binary if it's already been downloaded, or undefined. + */ +export async function getSembleBinaryPath(storageDir: string): Promise { + const info = getArchiveInfo() + if (!info) { + return undefined + } + + const binaryPath = path.join(storageDir, "semble", info.binary) + + try { + await fs.access(binaryPath) + return binaryPath + } catch { + return undefined + } +} + +/** + * Extracts a .tar.gz archive into the destination directory using the system `tar` command. + */ +function extractTarGz(archivePath: string, destDir: string): Promise { + return new Promise((resolve, reject) => { + const child = spawn("tar", ["-xzf", archivePath, "-C", destDir], { + shell: false, + stdio: ["ignore", "pipe", "pipe"], + }) + + let stderr = "" + child.stderr?.on("data", (data: Buffer) => { + stderr += data.toString() + }) + + child.on("error", (err) => reject(err)) + child.on("close", (code) => { + if (code === 0) { + resolve() + } else { + reject(new Error(`tar extraction failed (code ${code}): ${stderr.trim()}`)) + } + }) + }) +} + +/** + * Escapes a string for use inside a PowerShell single-quoted literal. + * In PowerShell, the only special character in a single-quoted string is the + * apostrophe itself, which is escaped by doubling it. + */ +function escapePowerShellLiteral(value: string): string { + return value.replace(/'/g, "''") +} + +/** + * Extracts a .zip archive into the destination directory. + * Uses PowerShell on Windows, unzip on other platforms. + */ +function extractZip(archivePath: string, destDir: string): Promise { + return new Promise((resolve, reject) => { + let child + + if (process.platform === "win32") { + child = spawn( + "powershell", + [ + "-NoProfile", + "-Command", + `Expand-Archive -Path '${escapePowerShellLiteral(archivePath)}' -DestinationPath '${escapePowerShellLiteral(destDir)}' -Force`, + ], + { shell: false, stdio: ["ignore", "pipe", "pipe"] }, + ) + } else { + child = spawn("unzip", ["-o", archivePath, "-d", destDir], { + shell: false, + stdio: ["ignore", "pipe", "pipe"], + }) + } + + let stderr = "" + child.stderr?.on("data", (data: Buffer) => { + stderr += data.toString() + }) + + child.on("error", (err) => reject(err)) + child.on("close", (code) => { + if (code === 0) { + resolve() + } else { + reject(new Error(`zip extraction failed (code ${code}): ${stderr.trim()}`)) + } + }) + }) +} + +/** + * Downloads a file from the given URL to the destination path. + * Follows redirects (GitHub releases use 302 redirects to CDN). + */ +function downloadFile(url: string, destPath: string, maxRedirects = 5): Promise { + return new Promise((resolve, reject) => { + if (maxRedirects <= 0) { + reject(new Error("Too many redirects")) + return + } + + const request = https.get(url, (response) => { + // Follow redirects + if ( + response.statusCode && + response.statusCode >= 300 && + response.statusCode < 400 && + response.headers.location + ) { + response.destroy() + downloadFile(response.headers.location, destPath, maxRedirects - 1) + .then(resolve) + .catch(reject) + return + } + + if (response.statusCode !== 200) { + response.destroy() + reject(new Error(`HTTP ${response.statusCode}: Failed to download ${url}`)) + return + } + + const file = createWriteStream(destPath) + response.pipe(file) + + file.on("finish", () => { + file.close() + resolve() + }) + + file.on("error", (err) => { + file.close() + reject(err) + }) + }) + + request.on("error", reject) + request.on("timeout", () => { + request.destroy() + reject(new Error("Download timed out")) + }) + + // 2 minute timeout for download + request.setTimeout(120_000) + }) +} diff --git a/src/services/code-index/semble/types.ts b/src/services/code-index/semble/types.ts new file mode 100644 index 0000000000..4a49c561f6 --- /dev/null +++ b/src/services/code-index/semble/types.ts @@ -0,0 +1,86 @@ +import { IndexingState } from "../interfaces/manager" +import { VectorStoreSearchResult } from "../interfaces/vector-store" + +/** + * Content types supported by semble for indexing. + * Maps to the `--content` CLI flag. + */ +export type SembleContentType = "code" | "docs" | "config" | "all" + +/** + * A single chunk returned by semble search results. + * Matches the `chunk` field in semble's JSON output format. + */ +export interface SembleChunk { + content: string + file_path: string + start_line: number + end_line: number + language: string | null + location: string +} + +/** + * Result from a semble CLI search invocation. + * Matches the JSON output format: `{ query, results: [{ chunk, score }] }`. + */ +export interface SembleSearchResult { + chunk: SembleChunk + score: number +} + +/** + * Result from checking if semble is functional. + */ +export interface SembleCheckResult { + installed: boolean + error?: string +} + +/** + * Configuration for the Semble provider. + */ +export interface SembleConfig { + /** Maximum search results to return. Default: 10. */ + topK: number + /** Content types to index. Default: "code". */ + content: SembleContentType +} + +/** + * Interface for the SembleProvider that wraps the semble CLI. + * + * Note: `findRelated` is available on SembleCLI but not yet exposed through + * this provider or CodeIndexManager. It's reserved for future use — e.g., a + * "find similar code" tool or context menu action. + */ +export interface ISembleProvider { + /** Initializes the provider — checks semble is installed. */ + initialize(): Promise + + /** Marks the provider as ready (semble indexes on-the-fly). */ + startIndexing(): Promise + + /** Stops indexing (no-op — semble has no background process). */ + stopIndexing(): void + + /** Searches the codebase for relevant code. */ + searchIndex(query: string, directoryPrefix?: string): Promise + + /** Clears index data (no-op in current version). */ + clearIndexData(): Promise + + /** Disposes resources. */ + dispose(): void + + /** Current state. */ + readonly state: IndexingState +} + +/** + * Default configuration values for Semble. + */ +export const SEMBLE_DEFAULTS = { + DEFAULT_TOP_K: 10, + DEFAULT_CONTENT: "code" as SembleContentType, +} diff --git a/src/services/code-index/service-factory.ts b/src/services/code-index/service-factory.ts index d23eff4810..335812bd41 100644 --- a/src/services/code-index/service-factory.ts +++ b/src/services/code-index/service-factory.ts @@ -45,6 +45,12 @@ export class CodeIndexServiceFactory { const provider = config.embedderProvider as EmbedderProvider + if (provider === "semble") { + throw new Error( + "Semble provider handles its own embedding. Do not call createEmbedder() for semble — use SembleProvider instead.", + ) + } + if (provider === "openai") { const apiKey = config.openAiOptions?.openAiNativeApiKey @@ -141,6 +147,13 @@ export class CodeIndexServiceFactory { const config = this.configManager.getConfig() const provider = config.embedderProvider as EmbedderProvider + + if (provider === "semble") { + throw new Error( + "Semble provider handles its own vector storage. Do not call createVectorStore() for semble — use SembleProvider instead.", + ) + } + const defaultModel = getDefaultModelId(provider) // Use the embedding model ID from config, not the chat model IDs const modelId = config.modelId ?? defaultModel diff --git a/src/shared/embeddingModels.ts b/src/shared/embeddingModels.ts index 7f5c9fac2b..a89e2c9488 100644 --- a/src/shared/embeddingModels.ts +++ b/src/shared/embeddingModels.ts @@ -87,6 +87,9 @@ export const EMBEDDING_MODEL_PROFILES: EmbeddingModelProfiles = { "qwen/qwen3-embedding-4b": { dimension: 2560, scoreThreshold: 0.4 }, "qwen/qwen3-embedding-8b": { dimension: 4096, scoreThreshold: 0.4 }, }, + semble: { + "potion-code-16M": { dimension: 256, scoreThreshold: 0.4 }, + }, } /** @@ -185,6 +188,9 @@ export function getDefaultModelId(provider: EmbedderProvider): string { case "openrouter": return "openai/text-embedding-3-large" + case "semble": + return "potion-code-16M" + default: // Fallback for unknown providers console.warn(`Unknown provider for default model ID: ${provider}. Falling back to OpenAI default.`) diff --git a/webview-ui/src/components/chat/CodeIndexPopover.tsx b/webview-ui/src/components/chat/CodeIndexPopover.tsx index 763c243ec1..d3e1af2b08 100644 --- a/webview-ui/src/components/chat/CodeIndexPopover.tsx +++ b/webview-ui/src/components/chat/CodeIndexPopover.tsx @@ -176,6 +176,12 @@ const createValidationSchema = (provider: EmbedderProvider, t: any) => { .min(1, t("settings:codeIndex.validation.modelSelectionRequired")), }) + case "semble": + // Semble requires no API keys, Qdrant URL, or model selection + return z.object({ + codebaseIndexEnabled: z.boolean(), + }) + default: return baseSchema } @@ -187,7 +193,7 @@ export const CodeIndexPopover: React.FC = ({ }) => { const SECRET_PLACEHOLDER = "••••••••••••••••" const { t } = useAppTranslation() - const { codebaseIndexConfig, codebaseIndexModels, cwd, apiConfiguration } = useExtensionState() + const { codebaseIndexConfig, codebaseIndexModels, cwd, apiConfiguration, platform, arch } = useExtensionState() const [open, setOpen] = useState(false) const [isAdvancedSettingsOpen, setIsAdvancedSettingsOpen] = useState(false) const [isSetupSettingsOpen, setIsSetupSettingsOpen] = useState(false) @@ -197,6 +203,10 @@ export const CodeIndexPopover: React.FC = ({ const [saveStatus, setSaveStatus] = useState<"idle" | "saving" | "saved" | "error">("idle") const [saveError, setSaveError] = useState(null) + // Check if semble is supported on the current platform + const SEMBLE_SUPPORTED_PLATFORMS = ["linux-x64", "linux-arm64", "darwin-arm64", "win32-x64"] + const isSembleSupported = platform && arch ? SEMBLE_SUPPORTED_PLATFORMS.includes(`${platform}-${arch}`) : false + // Form validation state const [formErrors, setFormErrors] = useState>({}) @@ -761,6 +771,11 @@ export const CodeIndexPopover: React.FC = ({ {t("settings:codeIndex.openRouterProvider")} + {isSembleSupported && ( + + {t("settings:codeIndex.sembleProvider")} + + )} @@ -1430,54 +1445,60 @@ export const CodeIndexPopover: React.FC = ({ )} - {/* Qdrant Settings */} -
- - - updateSetting("codebaseIndexQdrantUrl", e.target.value) - } - onBlur={(e: any) => { - // Set default Qdrant URL if field is empty - if (!e.target.value.trim()) { - currentSettings.codebaseIndexQdrantUrl = DEFAULT_QDRANT_URL - updateSetting("codebaseIndexQdrantUrl", DEFAULT_QDRANT_URL) - } - }} - placeholder={t("settings:codeIndex.qdrantUrlPlaceholder")} - className={cn("w-full", { - "border-red-500": formErrors.codebaseIndexQdrantUrl, - })} - /> - {formErrors.codebaseIndexQdrantUrl && ( -

- {formErrors.codebaseIndexQdrantUrl} -

- )} -
+ {/* Qdrant Settings — hidden for semble */} + {currentSettings.codebaseIndexEmbedderProvider !== "semble" && ( + <> +
+ + + updateSetting("codebaseIndexQdrantUrl", e.target.value) + } + onBlur={(e: any) => { + // Set default Qdrant URL if field is empty + if (!e.target.value.trim()) { + currentSettings.codebaseIndexQdrantUrl = DEFAULT_QDRANT_URL + updateSetting("codebaseIndexQdrantUrl", DEFAULT_QDRANT_URL) + } + }} + placeholder={t("settings:codeIndex.qdrantUrlPlaceholder")} + className={cn("w-full", { + "border-red-500": formErrors.codebaseIndexQdrantUrl, + })} + /> + {formErrors.codebaseIndexQdrantUrl && ( +

+ {formErrors.codebaseIndexQdrantUrl} +

+ )} +
-
- - updateSetting("codeIndexQdrantApiKey", e.target.value)} - placeholder={t("settings:codeIndex.qdrantApiKeyPlaceholder")} - className={cn("w-full", { - "border-red-500": formErrors.codeIndexQdrantApiKey, - })} - /> - {formErrors.codeIndexQdrantApiKey && ( -

- {formErrors.codeIndexQdrantApiKey} -

- )} -
+
+ + + updateSetting("codeIndexQdrantApiKey", e.target.value) + } + placeholder={t("settings:codeIndex.qdrantApiKeyPlaceholder")} + className={cn("w-full", { + "border-red-500": formErrors.codeIndexQdrantApiKey, + })} + /> + {formErrors.codeIndexQdrantApiKey && ( +

+ {formErrors.codeIndexQdrantApiKey} +

+ )} +
+ + )} )} diff --git a/webview-ui/src/i18n/locales/ca/settings.json b/webview-ui/src/i18n/locales/ca/settings.json index f7e3bb69e2..367f6e7336 100644 --- a/webview-ui/src/i18n/locales/ca/settings.json +++ b/webview-ui/src/i18n/locales/ca/settings.json @@ -213,7 +213,8 @@ "stoppingButton": "Aturant...", "workspaceToggleLabel": "Activar la indexació per a aquest espai de treball", "workspaceDisabledMessage": "La indexació està configurada però no habilitada per a aquest espai de treball.", - "autoEnableDefaultLabel": "Habilitar automàticament la indexació per a nous espais de treball" + "autoEnableDefaultLabel": "Habilitar automàticament la indexació per a nous espais de treball", + "sembleProvider": "Semble - Local" }, "autoApprove": { "toggleShortcut": "Pots configurar una drecera global per a aquesta configuració a les preferències del teu IDE.", diff --git a/webview-ui/src/i18n/locales/de/settings.json b/webview-ui/src/i18n/locales/de/settings.json index 1aa9f96053..c55c72734d 100644 --- a/webview-ui/src/i18n/locales/de/settings.json +++ b/webview-ui/src/i18n/locales/de/settings.json @@ -213,7 +213,8 @@ "stoppingButton": "Wird gestoppt...", "workspaceToggleLabel": "Indexierung für diesen Arbeitsbereich aktivieren", "workspaceDisabledMessage": "Indexierung ist konfiguriert, aber nicht für diesen Arbeitsbereich aktiviert.", - "autoEnableDefaultLabel": "Indexierung für neue Arbeitsbereiche automatisch aktivieren" + "autoEnableDefaultLabel": "Indexierung für neue Arbeitsbereiche automatisch aktivieren", + "sembleProvider": "Semble - Lokal" }, "autoApprove": { "toggleShortcut": "Du kannst in deinen IDE-Einstellungen einen globalen Shortcut für diese Einstellung konfigurieren.", diff --git a/webview-ui/src/i18n/locales/en/settings.json b/webview-ui/src/i18n/locales/en/settings.json index 7f3527df6b..547d3a4443 100644 --- a/webview-ui/src/i18n/locales/en/settings.json +++ b/webview-ui/src/i18n/locales/en/settings.json @@ -203,6 +203,7 @@ "openRouterApiKeyPlaceholder": "Enter your OpenRouter API key", "openRouterProviderRoutingLabel": "OpenRouter Provider Routing", "openRouterProviderRoutingDescription": "OpenRouter routes requests to the best available providers for your embedding model. By default, requests are load balanced across the top providers to maximize uptime. However, you can choose a specific provider to use for this model.", + "sembleProvider": "Semble - Local", "openaiCompatibleProvider": "OpenAI Compatible", "openAiKeyLabel": "OpenAI API Key", "openAiKeyPlaceholder": "Enter your OpenAI API key", diff --git a/webview-ui/src/i18n/locales/es/settings.json b/webview-ui/src/i18n/locales/es/settings.json index 154319baef..90206c8134 100644 --- a/webview-ui/src/i18n/locales/es/settings.json +++ b/webview-ui/src/i18n/locales/es/settings.json @@ -213,7 +213,8 @@ "stoppingButton": "Deteniendo...", "workspaceToggleLabel": "Activar indexación para este espacio de trabajo", "workspaceDisabledMessage": "La indexación está configurada pero no habilitada para este espacio de trabajo.", - "autoEnableDefaultLabel": "Habilitar automáticamente la indexación para nuevos espacios de trabajo" + "autoEnableDefaultLabel": "Habilitar automáticamente la indexación para nuevos espacios de trabajo", + "sembleProvider": "Semble - Local" }, "autoApprove": { "toggleShortcut": "Puedes configurar un atajo global para esta configuración en las preferencias de tu IDE.", diff --git a/webview-ui/src/i18n/locales/fr/settings.json b/webview-ui/src/i18n/locales/fr/settings.json index 30cdd37785..609b72b435 100644 --- a/webview-ui/src/i18n/locales/fr/settings.json +++ b/webview-ui/src/i18n/locales/fr/settings.json @@ -213,7 +213,8 @@ "stoppingButton": "Arrêt en cours...", "workspaceToggleLabel": "Activer l'indexation pour cet espace de travail", "workspaceDisabledMessage": "L'indexation est configurée mais non activée pour cet espace de travail.", - "autoEnableDefaultLabel": "Activer automatiquement l'indexation pour les nouveaux espaces de travail" + "autoEnableDefaultLabel": "Activer automatiquement l'indexation pour les nouveaux espaces de travail", + "sembleProvider": "Semble - Local" }, "autoApprove": { "toggleShortcut": "Vous pouvez configurer un raccourci global pour ce paramètre dans les préférences de votre IDE.", diff --git a/webview-ui/src/i18n/locales/hi/settings.json b/webview-ui/src/i18n/locales/hi/settings.json index a334b8cb5f..18cecc9e65 100644 --- a/webview-ui/src/i18n/locales/hi/settings.json +++ b/webview-ui/src/i18n/locales/hi/settings.json @@ -213,7 +213,8 @@ "stoppingButton": "रोक रहा है...", "workspaceToggleLabel": "इस वर्कस्पेस के लिए इंडेक्सिंग सक्षम करें", "workspaceDisabledMessage": "इंडेक्सिंग कॉन्फ़िगर की गई है लेकिन इस वर्कस्पेस के लिए सक्षम नहीं है।", - "autoEnableDefaultLabel": "नए वर्कस्पेस के लिए स्वचालित रूप से इंडेक्सिंग सक्षम करें" + "autoEnableDefaultLabel": "नए वर्कस्पेस के लिए स्वचालित रूप से इंडेक्सिंग सक्षम करें", + "sembleProvider": "Semble - स्थानीय" }, "autoApprove": { "toggleShortcut": "आप अपनी आईडीई वरीयताओं में इस सेटिंग के लिए एक वैश्विक शॉर्टकट कॉन्फ़िगर कर सकते हैं।", diff --git a/webview-ui/src/i18n/locales/id/settings.json b/webview-ui/src/i18n/locales/id/settings.json index 23e974429e..c348b19d32 100644 --- a/webview-ui/src/i18n/locales/id/settings.json +++ b/webview-ui/src/i18n/locales/id/settings.json @@ -213,7 +213,8 @@ "stoppingButton": "Menghentikan...", "workspaceToggleLabel": "Aktifkan pengindeksan untuk ruang kerja ini", "workspaceDisabledMessage": "Pengindeksan dikonfigurasi tetapi tidak diaktifkan untuk ruang kerja ini.", - "autoEnableDefaultLabel": "Aktifkan pengindeksan secara otomatis untuk ruang kerja baru" + "autoEnableDefaultLabel": "Aktifkan pengindeksan secara otomatis untuk ruang kerja baru", + "sembleProvider": "Semble - Lokal" }, "autoApprove": { "toggleShortcut": "Anda dapat mengonfigurasi pintasan global untuk pengaturan ini di preferensi IDE Anda.", diff --git a/webview-ui/src/i18n/locales/it/settings.json b/webview-ui/src/i18n/locales/it/settings.json index 3015d9338d..a75a8ac106 100644 --- a/webview-ui/src/i18n/locales/it/settings.json +++ b/webview-ui/src/i18n/locales/it/settings.json @@ -213,7 +213,8 @@ "stoppingButton": "Interruzione...", "workspaceToggleLabel": "Abilita l'indicizzazione per questo workspace", "workspaceDisabledMessage": "L'indicizzazione è configurata ma non abilitata per questo workspace.", - "autoEnableDefaultLabel": "Abilita automaticamente l'indicizzazione per i nuovi workspace" + "autoEnableDefaultLabel": "Abilita automaticamente l'indicizzazione per i nuovi workspace", + "sembleProvider": "Semble - Locale" }, "autoApprove": { "toggleShortcut": "Puoi configurare una scorciatoia globale per questa impostazione nelle preferenze del tuo IDE.", diff --git a/webview-ui/src/i18n/locales/ja/settings.json b/webview-ui/src/i18n/locales/ja/settings.json index 02091510e5..6554f7f04b 100644 --- a/webview-ui/src/i18n/locales/ja/settings.json +++ b/webview-ui/src/i18n/locales/ja/settings.json @@ -213,7 +213,8 @@ "stoppingButton": "停止中...", "workspaceToggleLabel": "このワークスペースのインデックス作成を有効にする", "workspaceDisabledMessage": "インデックス作成は設定済みですが、このワークスペースでは有効になっていません。", - "autoEnableDefaultLabel": "新しいワークスペースのインデックス作成を自動的に有効にする" + "autoEnableDefaultLabel": "新しいワークスペースのインデックス作成を自動的に有効にする", + "sembleProvider": "Semble - ローカル" }, "autoApprove": { "toggleShortcut": "IDEの環境設定で、この設定のグローバルショートカットを設定できます。", diff --git a/webview-ui/src/i18n/locales/ko/settings.json b/webview-ui/src/i18n/locales/ko/settings.json index 4a2155aba4..ca99106fb6 100644 --- a/webview-ui/src/i18n/locales/ko/settings.json +++ b/webview-ui/src/i18n/locales/ko/settings.json @@ -213,7 +213,8 @@ "stoppingButton": "중지 중...", "workspaceToggleLabel": "이 워크스페이스에 대한 인덱싱 활성화", "workspaceDisabledMessage": "인덱싱이 구성되었지만 이 워크스페이스에서는 활성화되지 않았습니다.", - "autoEnableDefaultLabel": "새 워크스페이스에 대한 인덱싱 자동 활성화" + "autoEnableDefaultLabel": "새 워크스페이스에 대한 인덱싱 자동 활성화", + "sembleProvider": "Semble - 로컬" }, "autoApprove": { "toggleShortcut": "IDE 환경 설정에서 이 설정에 대한 전역 바로 가기를 구성할 수 있습니다.", diff --git a/webview-ui/src/i18n/locales/nl/settings.json b/webview-ui/src/i18n/locales/nl/settings.json index 006fd8721f..b682b6590d 100644 --- a/webview-ui/src/i18n/locales/nl/settings.json +++ b/webview-ui/src/i18n/locales/nl/settings.json @@ -213,7 +213,8 @@ "stoppingButton": "Stoppen...", "workspaceToggleLabel": "Indexering inschakelen voor deze werkruimte", "workspaceDisabledMessage": "Indexering is geconfigureerd maar niet ingeschakeld voor deze werkruimte.", - "autoEnableDefaultLabel": "Indexering automatisch inschakelen voor nieuwe werkruimtes" + "autoEnableDefaultLabel": "Indexering automatisch inschakelen voor nieuwe werkruimtes", + "sembleProvider": "Semble - Lokaal" }, "autoApprove": { "toggleShortcut": "U kunt een globale sneltoets voor deze instelling configureren in de voorkeuren van uw IDE.", diff --git a/webview-ui/src/i18n/locales/pl/settings.json b/webview-ui/src/i18n/locales/pl/settings.json index c4492da87c..53cccc25aa 100644 --- a/webview-ui/src/i18n/locales/pl/settings.json +++ b/webview-ui/src/i18n/locales/pl/settings.json @@ -213,7 +213,8 @@ "stoppingButton": "Zatrzymywanie...", "workspaceToggleLabel": "Włącz indeksowanie dla tego workspace'a", "workspaceDisabledMessage": "Indeksowanie jest skonfigurowane, ale nie włączone dla tego workspace'a.", - "autoEnableDefaultLabel": "Automatycznie włączaj indeksowanie dla nowych workspace'ów" + "autoEnableDefaultLabel": "Automatycznie włączaj indeksowanie dla nowych workspace'ów", + "sembleProvider": "Semble - Lokalny" }, "autoApprove": { "toggleShortcut": "Możesz skonfigurować globalny skrót dla tego ustawienia w preferencjach swojego IDE.", diff --git a/webview-ui/src/i18n/locales/pt-BR/settings.json b/webview-ui/src/i18n/locales/pt-BR/settings.json index 6e19325c5c..43bb210b99 100644 --- a/webview-ui/src/i18n/locales/pt-BR/settings.json +++ b/webview-ui/src/i18n/locales/pt-BR/settings.json @@ -213,7 +213,8 @@ "stoppingButton": "Parando...", "workspaceToggleLabel": "Ativar indexação para este workspace", "workspaceDisabledMessage": "A indexação está configurada, mas não ativada para este workspace.", - "autoEnableDefaultLabel": "Ativar indexação automaticamente para novos workspaces" + "autoEnableDefaultLabel": "Ativar indexação automaticamente para novos workspaces", + "sembleProvider": "Semble - Local" }, "autoApprove": { "toggleShortcut": "Você pode configurar um atalho global para esta configuração nas preferências do seu IDE.", diff --git a/webview-ui/src/i18n/locales/ru/settings.json b/webview-ui/src/i18n/locales/ru/settings.json index 2641939b9b..eb2c1f29cb 100644 --- a/webview-ui/src/i18n/locales/ru/settings.json +++ b/webview-ui/src/i18n/locales/ru/settings.json @@ -213,7 +213,8 @@ "stoppingButton": "Остановка...", "workspaceToggleLabel": "Включить индексацию для этого рабочего пространства", "workspaceDisabledMessage": "Индексация настроена, но не включена для этого рабочего пространства.", - "autoEnableDefaultLabel": "Автоматически включать индексацию для новых рабочих пространств" + "autoEnableDefaultLabel": "Автоматически включать индексацию для новых рабочих пространств", + "sembleProvider": "Semble - Локальный" }, "autoApprove": { "toggleShortcut": "Вы можете настроить глобальное сочетание клавиш для этого параметра в настройках вашей IDE.", diff --git a/webview-ui/src/i18n/locales/tr/settings.json b/webview-ui/src/i18n/locales/tr/settings.json index 2ed19417e7..d1e10f7c8e 100644 --- a/webview-ui/src/i18n/locales/tr/settings.json +++ b/webview-ui/src/i18n/locales/tr/settings.json @@ -213,7 +213,8 @@ "stoppingButton": "Durduruluyor...", "workspaceToggleLabel": "Bu çalışma alanı için indekslemeyi etkinleştir", "workspaceDisabledMessage": "İndeksleme yapılandırıldı ancak bu çalışma alanı için etkinleştirilmedi.", - "autoEnableDefaultLabel": "Yeni çalışma alanları için indekslemeyi otomatik etkinleştir" + "autoEnableDefaultLabel": "Yeni çalışma alanları için indekslemeyi otomatik etkinleştir", + "sembleProvider": "Semble - Yerel" }, "autoApprove": { "toggleShortcut": "IDE tercihlerinizde bu ayar için genel bir kısayol yapılandırabilirsiniz.", diff --git a/webview-ui/src/i18n/locales/vi/settings.json b/webview-ui/src/i18n/locales/vi/settings.json index e32beeff3e..2482a97222 100644 --- a/webview-ui/src/i18n/locales/vi/settings.json +++ b/webview-ui/src/i18n/locales/vi/settings.json @@ -213,7 +213,8 @@ "stoppingButton": "Đang dừng...", "workspaceToggleLabel": "Bật lập chỉ mục cho không gian làm việc này", "workspaceDisabledMessage": "Lập chỉ mục đã được cấu hình nhưng chưa được bật cho không gian làm việc này.", - "autoEnableDefaultLabel": "Tự động bật lập chỉ mục cho không gian làm việc mới" + "autoEnableDefaultLabel": "Tự động bật lập chỉ mục cho không gian làm việc mới", + "sembleProvider": "Semble - Cục bộ" }, "autoApprove": { "toggleShortcut": "Bạn có thể định cấu hình một phím tắt chung cho cài đặt này trong tùy chọn IDE của bạn.", diff --git a/webview-ui/src/i18n/locales/zh-CN/settings.json b/webview-ui/src/i18n/locales/zh-CN/settings.json index fb67bb89c0..5cb9cdd189 100644 --- a/webview-ui/src/i18n/locales/zh-CN/settings.json +++ b/webview-ui/src/i18n/locales/zh-CN/settings.json @@ -213,7 +213,8 @@ "stoppingButton": "正在停止...", "workspaceToggleLabel": "为此工作区启用索引", "workspaceDisabledMessage": "索引已配置,但尚未为此工作区启用。", - "autoEnableDefaultLabel": "自动为新工作区启用索引" + "autoEnableDefaultLabel": "自动为新工作区启用索引", + "sembleProvider": "Semble - 本地" }, "autoApprove": { "toggleShortcut": "您可以在 IDE 首选项中为此设置配置全局快捷方式。", diff --git a/webview-ui/src/i18n/locales/zh-TW/settings.json b/webview-ui/src/i18n/locales/zh-TW/settings.json index f0d4725cd2..f1b7806cfc 100644 --- a/webview-ui/src/i18n/locales/zh-TW/settings.json +++ b/webview-ui/src/i18n/locales/zh-TW/settings.json @@ -228,7 +228,8 @@ "stoppingButton": "正在停止...", "workspaceToggleLabel": "為此工作區啟用索引", "workspaceDisabledMessage": "索引已設定,但尚未為此工作區啟用。", - "autoEnableDefaultLabel": "自動為新工作區啟用索引" + "autoEnableDefaultLabel": "自動為新工作區啟用索引", + "sembleProvider": "Semble - 本機" }, "autoApprove": { "description": "無需詢問許可即可執行下列動作。請僅在您完全信任且了解安全風險的情況下啟用此功能。",