diff --git a/.changeset/rag-context-budget.md b/.changeset/rag-context-budget.md
new file mode 100644
index 0000000..ece48d4
--- /dev/null
+++ b/.changeset/rag-context-budget.md
@@ -0,0 +1,13 @@
+---
+"think-app": minor
+---
+
+Maximize RAG context utilization with dynamic budget system and improve chat UX
+
+- Add dynamic context budget that scales with model context window size
+- Widen distance thresholds proportionally when budget allows more content
+- Include AI-generated summaries as context overview/fallback for each memory
+- Subtract page content from budget in browser extension chat to prevent overflow
+- Rewrite system prompt for better grounding and natural source attribution
+- Fix missing bullet point markers in chat message markdown rendering
+- Remove hover blur effect from assistant chat message bubbles
diff --git a/app/src/components/ChatMessage.tsx b/app/src/components/ChatMessage.tsx
index be9d98b..d7bf237 100644
--- a/app/src/components/ChatMessage.tsx
+++ b/app/src/components/ChatMessage.tsx
@@ -42,7 +42,7 @@ export function ChatMessage({ message }: ChatMessageProps) {
             "p-4 rounded-2xl",
             isUser
               ? "bg-primary text-primary-foreground"
-              : cn(glass.base, glass.hover)
+              : glass.base
           )}
         >
           {isUser ? (
diff --git a/app/src/index.css b/app/src/index.css
index 8eb0d0d..549b295 100644
--- a/app/src/index.css
+++ b/app/src/index.css
@@ -153,9 +153,15 @@
   .chat-prose p:last-child {
     margin-bottom: 0;
   }
-  .chat-prose ul, .chat-prose ol {
+  .chat-prose ul {
     margin: 0.5em 0;
     padding-left: 1.5em;
+    list-style-type: disc;
+  }
+  .chat-prose ol {
+    margin: 0.5em 0;
+    padding-left: 1.5em;
+    list-style-type: decimal;
   }
   .chat-prose li {
     margin: 0.25em 0;
diff --git a/backend/app/native_messaging.py b/backend/app/native_messaging.py
index 349ef64..a650b77 100644
--- a/backend/app/native_messaging.py
+++ b/backend/app/native_messaging.py
@@ -14,11 +14,13 @@
 from .services.embeddings import (
     get_embedding, get_current_embedding_model,
     filter_memories_dynamically, format_memories_as_context,
+    compute_context_budget,
 )
 from .services.ai import (
-    chat, process_memory_async, process_conversation_title_async,
+    chat, get_model, process_memory_async, process_conversation_title_async,
     maybe_rewrite_query, generate_followup_suggestions,
 )
+from .models_info import get_context_window
 from .services.query import preprocess_query, extract_keywords, is_special_prompt, execute_special_handler
 from .events import event_manager, MemoryEvent, EventType
 
@@ -283,6 +285,14 @@ async def _chat_message(self, params: dict) -> dict:
         sources = []
         return_page_summary = None
 
+        # Compute context budget based on model and conversation history
+        model = get_model()
+        context_window = get_context_window(model)
+        # Account for page content that will also consume context space
+        page_content_chars = min(len(page_content), 8000) if page_content else 0
+        context_budget = compute_context_budget(context_window, history)
+        context_budget = max(2000, context_budget - page_content_chars)
+
         # Generate page summary for memory search (only on first message)
         if page_content and not page_summary:
             page_summary = await self._generate_page_summary(page_content, page_title)
@@ -333,7 +343,7 @@ async def _chat_message(self, params: dict) -> dict:
                     # Use model-specific thresholds for filtering
                     embedding_model = get_current_embedding_model()
                     filtered_memories = filter_memories_dynamically(
-                        memories, embedding_model=embedding_model
+                        memories, embedding_model=embedding_model, context_budget_chars=context_budget
                     )
 
                     # Build sources list for the response
@@ -349,7 +359,7 @@ async def _chat_message(self, params: dict) -> dict:
                     ]
 
                     # Format memories as context
-                    memories_context = format_memories_as_context(filtered_memories)
+                    memories_context = format_memories_as_context(filtered_memories, max_chars=context_budget)
                     if memories_context:
                         context_parts.append(memories_context)
 
diff --git a/backend/app/routes/chat.py b/backend/app/routes/chat.py
index d00fc4f..5112182 100644
--- a/backend/app/routes/chat.py
+++ b/backend/app/routes/chat.py
@@ -14,7 +14,7 @@
 from ..services.ai.query_rewriting import maybe_rewrite_query
 from ..services.ai.suggestions import get_quick_prompts, generate_followup_suggestions
 from ..services.query.special_handlers import is_special_prompt, execute_special_handler
-from ..services.embeddings.filtering import filter_memories_dynamically, format_memories_as_context
+from ..services.embeddings.filtering import filter_memories_dynamically, format_memories_as_context, compute_context_budget
 from ..db.search import search_similar_memories
 from ..schemas import ChatRequest
 from .. import config
@@ -66,6 +66,11 @@ async def _retrieve_context(
     sources = []
     attached_context = ""
 
+    # Compute context budget based on model and conversation history
+    model = get_model()
+    context_window = get_context_window(model)
+    total_budget = compute_context_budget(context_window, history)
+
     # Handle explicitly attached memories first
     if attached_memory_ids:
         attached_memories = []
@@ -80,7 +85,9 @@ async def _retrieve_context(
                 })
 
         if attached_memories:
-            attached_context = "## User's Selected Memory:\n" + format_memories_as_context(attached_memories)
+            # Give attached memories up to half the budget; RAG gets the other half
+            attached_budget = total_budget // 2
+            attached_context = "## User's Selected Memory:\n" + format_memories_as_context(attached_memories, max_chars=attached_budget)
             logger.info(f"Using {len(attached_memories)} attached memories as context")
 
     # Skip RAG for very short messages (< 10 chars) or when explicitly disabled
@@ -118,9 +125,13 @@ async def _retrieve_context(
             )
 
             if similar_memories:
+                # RAG budget: half if attached memories exist, full otherwise
+                rag_budget = total_budget // 2 if attached_context else total_budget
                 # Filter using dynamic threshold with model-specific thresholds
-                filtered_memories = filter_memories_dynamically(similar_memories, embedding_model=embedding_model)
-                context = format_memories_as_context(filtered_memories)
+                filtered_memories = filter_memories_dynamically(
+                    similar_memories, embedding_model=embedding_model, context_budget_chars=rag_budget
+                )
+                context = format_memories_as_context(filtered_memories, max_chars=rag_budget)
                 # Build sources list from filtered memories, avoiding duplicates with attached
                 attached_ids = {s["id"] for s in sources}
                 for m in filtered_memories:
diff --git a/backend/app/services/ai/client.py b/backend/app/services/ai/client.py
index fc17a1e..816a860 100644
--- a/backend/app/services/ai/client.py
+++ b/backend/app/services/ai/client.py
@@ -5,11 +5,15 @@
 
 
 # Custom system prompt for Think
-SYSTEM_PROMPT = """You are Think, a friendly personal assistant with access to the user's saved memories and notes. You help them recall information, answer questions, and have natural conversations.
-
-When context from their memories is provided, use it naturally to inform your responses without explicitly mentioning "your saved article" or "your memories" - just incorporate the knowledge seamlessly.
-
-Keep responses conversational and concise. Be helpful and warm, like a knowledgeable friend."""
+SYSTEM_PROMPT = """You are Think, a helpful personal assistant. You help users recall and explore information from their saved content. 
+The content can be owned by the user (like notes and voice memos) or from third-party sources (like web pages, videos, and audio). 
+You use this information to answer questions, provide summaries, and assist with tasks.
+
+Guidelines:
+- Ground your answers in the provided context. If it doesn't contain enough information to answer, say so rather than guessing.
+- Be conversational and concise.
+- You can naturally reference sources (e.g. "from a saved article", "in one of your videos"), but never output the raw type tags like [web] or [video] in your responses.
+"""
 
 
 async def get_client() -> AsyncOpenAI:
diff --git a/backend/app/services/embeddings/__init__.py b/backend/app/services/embeddings/__init__.py
index d6bd4ed..d703b0d 100644
--- a/backend/app/services/embeddings/__init__.py
+++ b/backend/app/services/embeddings/__init__.py
@@ -1,4 +1,4 @@
 """Vector Search & Similarity services."""
 from .client import get_embedding, cosine_similarity, get_current_embedding_model
-from .filtering import filter_memories_dynamically, format_memories_as_context
+from .filtering import filter_memories_dynamically, format_memories_as_context, compute_context_budget
 from .jobs import job_manager, reembed_worker, JobStatus
diff --git a/backend/app/services/embeddings/filtering.py b/backend/app/services/embeddings/filtering.py
index d7a1282..e95959f 100644
--- a/backend/app/services/embeddings/filtering.py
+++ b/backend/app/services/embeddings/filtering.py
@@ -1,6 +1,7 @@
 """Shared memory filtering logic for RAG retrieval."""
 
 import logging
+from datetime import datetime
 
 logger = logging.getLogger(__name__)
 
@@ -14,9 +15,56 @@
 }
 DEFAULT_THRESHOLDS = {"excellent": 0.25, "good": 0.35, "cutoff": 0.45}
 
+# Budget constants
+BUDGET_FLOOR_CHARS = 2000
+BUDGET_CEILING_CHARS = 60000
+DEFAULT_BUDGET_CHARS = 8000
+SYSTEM_PROMPT_CHARS_ESTIMATE = 500
+RESPONSE_RESERVE_TOKENS = 1024
+CHARS_PER_TOKEN = 4
+
+
+def compute_context_budget(
+    context_window_tokens: int,
+    history: list[dict],
+    system_prompt_chars: int = SYSTEM_PROMPT_CHARS_ESTIMATE,
+    response_reserve_tokens: int = RESPONSE_RESERVE_TOKENS,
+) -> int:
+    """Compute how many characters of memory context we can fit.
+
+    Estimates token usage for system prompt, conversation history, and
+    response reserve, then returns the remaining space as characters.
+    Result is clamped between BUDGET_FLOOR_CHARS and BUDGET_CEILING_CHARS.
+    """
+    # Estimate tokens used by history
+    history_chars = sum(len(m.get("content", "")) for m in history)
+    history_tokens = history_chars // CHARS_PER_TOKEN
+
+    # Estimate tokens used by system prompt
+    system_tokens = system_prompt_chars // CHARS_PER_TOKEN
+
+    # Reserve at least response_reserve_tokens or 10% of window, whichever is larger
+    effective_reserve = max(response_reserve_tokens, context_window_tokens // 10)
+
+    # Available tokens for context
+    available_tokens = context_window_tokens - system_tokens - history_tokens - effective_reserve
+
+    # Convert to chars
+    available_chars = available_tokens * CHARS_PER_TOKEN
+
+    budget = max(BUDGET_FLOOR_CHARS, min(BUDGET_CEILING_CHARS, available_chars))
+    logger.info(
+        f"Context budget: {budget} chars "
+        f"(window={context_window_tokens}, history_tokens={history_tokens}, "
+        f"available_tokens={available_tokens})"
+    )
+    return budget
+
 
 def filter_memories_dynamically(
-    memories: list[dict], max_results: int = 5, embedding_model: str | None = None
+    memories: list[dict],
+    embedding_model: str | None = None,
+    context_budget_chars: int = DEFAULT_BUDGET_CHARS,
 ) -> list[dict]:
     """Filter memories using distance-based relevance.
 
@@ -26,6 +74,7 @@ def filter_memories_dynamically(
     - All match types (hybrid/keyword/vector) must pass distance check
     - Adaptive limits based on best match quality
     - Use model-specific thresholds when available
+    - Scale max_results caps based on context_budget_chars
     """
     if not memories:
         logger.info("No memories to filter")
@@ -38,17 +87,20 @@ def filter_memories_dynamically(
         else DEFAULT_THRESHOLDS
     )
 
+    # Budget scaling factor: how much bigger is our budget vs the 8k default?
+    budget_scale = context_budget_chars / DEFAULT_BUDGET_CHARS
+
     # Sort by distance (lowest/best first)
     sorted_memories = sorted(memories, key=lambda m: m.get("distance") or 999)
 
     # Log what we're working with
-    logger.info(f"Filtering {len(sorted_memories)} memories (model: {embedding_model})")
+    logger.info(f"Filtering {len(sorted_memories)} memories (model: {embedding_model}, budget_scale: {budget_scale:.1f}x)")
     for m in sorted_memories[:5]:
         dist = m.get("distance")
         dist_str = f"{dist:.3f}" if dist is not None else "N/A"
         rrf = m.get("rrf_score") or 0
         rrf_str = f"{rrf:.4f}" if rrf else "N/A"
-        logger.info(
+        logger.debug(
             f"  [{m.get('match_type', '?')}] {m.get('title', '')[:50]}... dist={dist_str} rrf={rrf_str}"
         )
 
@@ -62,18 +114,24 @@ def filter_memories_dynamically(
 
     # Calculate dynamic threshold: include results within range of best
     # Tighter range for better matches, looser for weaker ones
+    # Scale max_results based on budget (capped at reasonable limits)
+    # Widen acceptance range when we have more budget to fill:
+    # at 2x budget add +0.01, at 4x add +0.02, capped at +0.03
+    budget_bonus = min(0.03, max(0.0, (budget_scale - 1.0) * 0.01))
+
     if best_distance < thresholds["excellent"]:
-        # Excellent match: include results within +0.08
-        threshold = best_distance + 0.08
-        max_results = 5
+        threshold = best_distance + 0.08 + budget_bonus
+        max_results = min(10, max(5, int(5 * budget_scale)))
     elif best_distance < thresholds["good"]:
-        # Good match: include results within +0.06
-        threshold = best_distance + 0.06
-        max_results = 3
+        threshold = best_distance + 0.06 + budget_bonus
+        max_results = min(8, max(3, int(3 * budget_scale)))
     else:
-        # Marginal match: only include very close results
-        threshold = best_distance + 0.04
-        max_results = 2
+        # Marginal match: half the bonus to stay conservative
+        threshold = best_distance + 0.04 + (budget_bonus * 0.5)
+        max_results = min(4, max(2, int(2 * budget_scale)))
+
+    # Never exceed the model's absolute cutoff
+    threshold = min(threshold, thresholds["cutoff"])
 
     logger.info(
         f"Best distance: {best_distance:.3f}, threshold: {threshold:.3f}, max: {max_results}"
@@ -88,44 +146,137 @@ def filter_memories_dynamically(
             continue
 
         if distance <= threshold:
-            logger.info(
+            logger.debug(
                 f"  Including [{match_type}] (dist={distance:.3f}): {m.get('title', '')[:30]}"
             )
             filtered.append(m)
         else:
-            logger.info(
+            logger.debug(
                 f"  Excluding [{match_type}] (dist={distance:.3f} > {threshold:.3f}): {m.get('title', '')[:30]}"
             )
 
     result = filtered[:max_results]
-    logger.info(f"Filtered to {len(result)} memories")
+    logger.info(
+        f"Filter stats: {{"
+        f"input: {len(memories)}, output: {len(result)}, "
+        f"best_distance: {best_distance:.4f}, threshold: {threshold:.4f}, "
+        f"budget_scale: {budget_scale:.2f}, budget_bonus: {budget_bonus:.4f}, "
+        f"max_results_cap: {max_results}, model: {embedding_model or 'default'}"
+        f"}}"
+    )
     return result
 
 
-def format_memories_as_context(memories: list[dict], max_chars: int = 8000) -> str:
+def _format_metadata_line(memory: dict) -> str:
+    """Build a compact metadata line: memory type + saved date."""
+    memory_type = memory.get("type") or memory.get("memory_type") or "note"
+    created = memory.get("created_at") or memory.get("date")
+    if created:
+        if isinstance(created, str):
+            try:
+                dt = datetime.fromisoformat(created.replace("Z", "+00:00"))
+                date_str = dt.strftime("%Y-%m-%d")
+            except (ValueError, TypeError):
+                date_str = ""
+        elif isinstance(created, datetime):
+            date_str = created.strftime("%Y-%m-%d")
+        else:
+            date_str = ""
+    else:
+        date_str = ""
+    parts = [memory_type]
+    if date_str:
+        parts.append(f"saved {date_str}")
+    return f"[{' | '.join(parts)}]"
+
+
+def format_memories_as_context(memories: list[dict], max_chars: int = DEFAULT_BUDGET_CHARS) -> str:
     """Format retrieved memories into a context string for the LLM.
 
     Expects memories to be pre-filtered by filter_memories_dynamically.
-    Uses more generous limits to give LLM more context for valuable answers.
+    Dynamically allocates space per memory and includes AI-generated
+    summaries as overview (for long content) or fallback (when truncated).
     """
     if not memories:
         return ""
 
+    num_memories = len(memories)
+    # Per-memory char budget (floor of 2000 to keep each entry meaningful)
+    per_memory_limit = max(2000, max_chars // num_memories)
+
     context_parts = []
     total_chars = 0
+    summaries_used = 0
 
     for memory in memories:
         title = memory.get("title", "Untitled")
         content = memory.get("content", "")
+        summary = memory.get("summary") or ""
+        metadata = _format_metadata_line(memory)
+
+        # Build the entry
+        header = f"### {title}\n{metadata}"
+        header_len = len(header) + 1  # +1 for newline
 
-        # Truncate content if too long (increased from 800 to 2000 for richer context)
-        if len(content) > 2000:
-            content = content[:2000] + "..."
+        # Remaining space for this memory's body
+        body_budget = min(per_memory_limit, max_chars - total_chars) - header_len
+        if body_budget <= 0:
+            # Try to squeeze in just the summary as a last-resort entry
+            if summary:
+                summary_entry = f"{header}\n{summary}"
+                if total_chars + len(summary_entry) <= max_chars:
+                    context_parts.append(summary_entry)
+                    total_chars += len(summary_entry)
+            break
+
+        # Decide what body content to include
+        content_needs_truncation = len(content) > body_budget
+        summary_included = False
+
+        if summary and not content_needs_truncation:
+            # Content fits fully: include summary as overview + full content
+            body = f"{summary}\n\n{content}"
+            if len(body) > body_budget:
+                # Summary + full content exceeds budget, just use content
+                body = content
+            else:
+                summary_included = True
+        elif content_needs_truncation and summary:
+            # Content must be truncated: include summary as fallback + truncated content
+            # Reserve space for summary line
+            summary_line = f"{summary}\n\n"
+            content_budget = body_budget - len(summary_line)
+            if content_budget > 200:
+                body = f"{summary_line}{content[:content_budget]}..."
+                summary_included = True
+            else:
+                # Not enough room for both; use summary alone
+                body = summary
+                summary_included = True
+        else:
+            # No summary available: just use content directly
+            if len(content) > body_budget:
+                body = content[:body_budget] + "..."
+            else:
+                body = content
 
-        entry = f"### {title}\n{content}"
+        if summary_included:
+            summaries_used += 1
 
-        # Check if adding this would exceed limit
+        entry = f"{header}\n{body}"
+
+        # Final check against overall budget
         if total_chars + len(entry) > max_chars:
+            # Try a trimmed version
+            remaining = max_chars - total_chars
+            if remaining > header_len + 200:
+                trim_budget = remaining - header_len - 1
+                if summary and len(summary) <= trim_budget:
+                    entry = f"{header}\n{summary}"
+                else:
+                    entry = f"{header}\n{content[:trim_budget]}..."
+                context_parts.append(entry)
+                total_chars += len(entry)
             break
 
         context_parts.append(entry)
@@ -134,4 +285,10 @@ def format_memories_as_context(memories: list[dict], max_chars: int = 8000) -> s
     if not context_parts:
         return ""
 
-    return "## Relevant Memories:\n\n" + "\n\n---\n\n".join(context_parts)
+    result = "## Relevant Memories:\n\n" + "\n\n---\n\n".join(context_parts)
+    utilization_pct = round((len(result) / max_chars) * 100, 1) if max_chars > 0 else 0
+    logger.info(
+        f"Formatted {len(context_parts)} memories into {len(result)} chars "
+        f"(budget: {max_chars}, utilization: {utilization_pct}%, summaries: {summaries_used}/{len(context_parts)})"
+    )
+    return result