-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathapp.py
More file actions
328 lines (269 loc) · 12.7 KB
/
app.py
File metadata and controls
328 lines (269 loc) · 12.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
import os
import json
import asyncio
from typing import Dict, List, Tuple
from dotenv import load_dotenv
import gradio as gr
import pdb
from agents import gen_trace_id
from section_agent import SectionResearchManager
from summarize_agent import generate_final_report
from frameworks.big_idea_framework import big_idea_sections
from frameworks.specific_idea_framework import specific_idea_sections
load_dotenv(override=True)
# ------------- Framework → Section descriptors (loaded from framework files) -------------
# ------------- Shared run params -------------
DEFAULT_RUN_PARAMS = {
"depth": "standard",
"lookback_days": 540,
"langs": ["en"],
"k_per_query": 6,
"max_queries": 12
}
# ------------- Helper: Make full section_details for SectionResearchManager -------------
def build_section_details(framework: str, topic: str, raw_desc: Dict, run_params: Dict) -> Dict:
# Replace <TOPIC> placeholders in example queries
ex_queries = [q.replace("<TOPIC>", topic) for q in raw_desc.get("example_queries", [])]
section_descriptor = {
"section": raw_desc["section"],
"description": raw_desc["description"],
"facets": raw_desc["facets"],
"example_queries": ex_queries
}
return {
"framework": framework,
"topic_or_idea": topic,
"section_descriptor": section_descriptor,
"run_params": run_params
}
# ------------- Orchestrator (parallel) with streaming logs -------------
async def run_framework_parallel_stream(framework: str, topic: str):
"""
Async generator that yields (chat_text, partial_results_json) tuples as the run progresses.
"""
if framework not in ("big-idea", "specific-idea"):
yield (f"❌ Unknown framework: {framework}", None)
return
section_defs = big_idea_sections() if framework == "big-idea" else specific_idea_sections()
trace_id = gen_trace_id()
trace_name = f"{framework} {topic}"
# Initial setup message with time estimation
num_sections = len(section_defs)
framework_name = "Big-Idea Exploration" if framework == "big-idea" else "Specific-Idea Validation"
est_time_min = num_sections * 1.5 # ~2-3 min per section minimum
est_time_max = num_sections * 3 # ~3-5 min per section with iterations
yield (f"""🚀 **Starting {framework_name}**
**Topic**: {topic}
**Sections to analyze**: {num_sections}
**Estimated time**: {est_time_min}-{est_time_max} minutes (depends on complexity)
Each section goes through a 7-step research pipeline:
1. Complexity assessment
2. Query generation
3. Web research
4. Deep analysis
5. Quality check
6. Self-healing (if gaps found)
7. Final synthesis
_Research is running in parallel across all sections..._
""", None)
# Use asyncio.Queue to collect progress messages from all sections
progress_queue = asyncio.Queue()
# Create a progress callback that adds messages to the queue
async def progress_callback(message: str):
await progress_queue.put(message)
# Show section overview
yield (f"\n📋 **Research Sections:**", None)
for sec_name, desc in section_defs.items():
display_name = desc.get("display_name", sec_name.replace("_", " ").title())
section_desc = desc.get("description", "")[:100] + "..." if len(desc.get("description", "")) > 100 else desc.get("description", "")
yield (f"• **{display_name}**: {section_desc}", None)
yield (f"\n⏳ **Launching parallel research agents...**\n", None)
# Kick off all sections in parallel
tasks = []
mgrs = {}
for sec_name, desc in section_defs.items():
details = build_section_details(framework, topic, desc, DEFAULT_RUN_PARAMS)
mgr = SectionResearchManager(sec_name, enable_critic=False)
mgrs[sec_name] = mgr
tasks.append(asyncio.create_task(mgr.run_section_manager(trace_id, details, trace_name, progress_callback)))
# Monitor both task completion and progress messages
active_tasks = set(tasks)
section_results = {}
while active_tasks or not progress_queue.empty():
# Check for completed tasks
done_tasks = {task for task in active_tasks if task.done()}
for task in done_tasks:
active_tasks.remove(task)
try:
res = await task
sec = res["section"]
brief = res["section_brief"]
section_results[sec] = res
# stream per-section done
conf = brief.get("confidence", 0.0)
hl_count = len(brief.get("highlights", []))
# Get display name and color for completion message
display_name = res.get("display_name", sec.replace("_", " ").title())
section_color = res.get("section_color", "#95a5a6")
yield (f'<span style="color: {section_color}; font-weight: bold;">✅ {display_name} complete — {hl_count} insights extracted (confidence: {conf:.0%})</span>', None)
except Exception as e:
print("Something went wrong")
yield (f"⚠️ A section failed: {e}", None)
# Check for progress messages
try:
while True:
message = progress_queue.get_nowait()
yield (message, None)
except asyncio.QueueEmpty:
pass
# Brief sleep to prevent busy waiting
if active_tasks:
await asyncio.sleep(0.1)
# Generate comprehensive final report using summarize_agent
yield (f"\n🎯 **All {num_sections} sections complete!**", None)
yield ("🔄 Synthesizing final report with cross-section fact verification and deduplication...", None)
yield ("⏱️ _This may take 1-2 minutes..._", None)
report_data = await generate_final_report(framework, topic, section_results, trace_id, trace_name)
# Format the final output - this will be handled by the improved UI
yield ("\n✨ **Research Complete!** Your comprehensive report is ready below.", report_data)
# ------------- Gradio UI -------------
CSS = """
#chat {height: 400px}
.json-display {font-family: 'Monaco', 'Consolas', monospace; font-size: 12px;}
.metadata-display {background: #f8f9fa; padding: 10px; border-radius: 5px;}
"""
with gr.Blocks(css=CSS, fill_height=True, theme=gr.themes.Soft()) as demo:
gr.Markdown("""## 🔎 ReallyDeepResearch
**Deep, multi-agent research system** — Parallel exploration with self-healing quality checks
Choose your framework:
- 🌐 **Big-Idea**: Market landscape, tech stack, research frontier, opportunities
- 🎯 **Specific-Idea**: Problem validation, ROI, competition, GTM, risks
_⏱️ Research typically takes 8-10 minutes depending on complexity_
""")
with gr.Row():
topic_in = gr.Textbox(
label="Topic / Idea",
placeholder="e.g., AI music • or • Agents to clear IT backlog",
lines=1
)
with gr.Row():
btn_big = gr.Button("🌐 Run Big-Idea Exploration", variant="primary")
btn_specific = gr.Button("🎯 Run Specific-Idea Exploration")
# Progress chat at the top
chat = gr.Chatbot(label="🔄 Research Progress", height=400, elem_id="chat")
# Organized results in tabs
with gr.Tabs():
with gr.TabItem("📄 Executive Report"):
narrative_display = gr.Markdown(
label="Executive Summary",
value="Research results will appear here...",
elem_classes=["narrative-display"]
)
metadata_display = gr.Markdown(
label="Research Statistics",
value="",
elem_classes=["metadata-display"]
)
with gr.TabItem("📊 Structured Data"):
json_display = gr.Code(
label="Section Analysis (JSON)",
language="json",
value="{}",
elem_classes=["json-display"]
)
with gr.TabItem("💾 Export"):
download_data = gr.JSON(label="Full Research Data", visible=False)
gr.Markdown("**Export Options:**")
gr.Markdown("_Click a button below to download your research report._")
with gr.Row():
export_json_btn = gr.DownloadButton("📥 Download JSON", variant="primary")
export_md_btn = gr.DownloadButton("📝 Download Markdown", variant="secondary")
# Hidden state for messages and data
state_msgs = gr.State([]) # List[Tuple[str,str]]
async def _start_run(framework: str, topic: str, msgs: List[Tuple[str, str]]):
if not topic or not topic.strip():
msgs = msgs + [("user", f"{framework}"), ("assistant", "❌ Please enter a topic/idea first.")]
# Clear all outputs and return
yield msgs, "", "", "", {}, msgs
return
# Add user's "start" message
msgs = msgs + [("user", f"{framework}: {topic}")]
# Clear previous outputs
current_json = ""
current_narrative = ""
current_metadata = ""
# Stream updates as they arrive
async for text, report_data in run_framework_parallel_stream(framework, topic.strip()):
msgs = msgs + [("assistant", text)]
if report_data is not None:
# Extract different parts of the report
if isinstance(report_data, dict):
# Format structured summary as JSON
structured_summary = report_data.get("structured_summary", {})
current_json = json.dumps(structured_summary, indent=2, ensure_ascii=False)
# Extract narrative report
current_narrative = report_data.get("narrative_report", "")
# Format metadata
metadata = report_data.get("metadata", {})
current_metadata = f"""**Research Metadata:**
- Total Facts: {metadata.get('total_facts', 0)}
- Average Confidence: {metadata.get('avg_confidence', 0):.2f}
- Sections Analyzed: {metadata.get('sections_count', 0)}"""
yield msgs, current_json, current_narrative, current_metadata, report_data or {}, msgs
# Final yield to ensure last state is displayed
yield msgs, current_json, current_narrative, current_metadata, report_data or {}, msgs
# Download functions
def download_json(report_data):
if not report_data or not isinstance(report_data, dict):
# Return a placeholder file if no data
import tempfile
with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f:
json.dump({"error": "No research data available yet"}, f, indent=2)
return f.name
import tempfile
# Create temporary file for JSON download
with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False, encoding='utf-8') as f:
json.dump(report_data, f, indent=2, ensure_ascii=False)
return f.name
def download_markdown(report_data):
if not report_data or not isinstance(report_data, dict):
# Return a placeholder file if no data
import tempfile
with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False, encoding='utf-8') as f:
f.write("# No research data available yet\n\nPlease run a research query first.")
return f.name
import tempfile
# Get the narrative report
narrative = report_data.get("narrative_report", "# No report available")
# Create temporary file for Markdown download
with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False, encoding='utf-8') as f:
f.write(narrative)
return f.name
# Button handlers (streaming)
btn_big.click(
_start_run,
inputs=[gr.State("big-idea"), topic_in, state_msgs],
outputs=[chat, json_display, narrative_display, metadata_display, download_data, state_msgs],
queue=True
)
btn_specific.click(
_start_run,
inputs=[gr.State("specific-idea"), topic_in, state_msgs],
outputs=[chat, json_display, narrative_display, metadata_display, download_data, state_msgs],
queue=True
)
# Download button handlers - DownloadButton automatically triggers download when function returns a file path
export_json_btn.click(
fn=download_json,
inputs=[download_data],
outputs=export_json_btn
)
export_md_btn.click(
fn=download_markdown,
inputs=[download_data],
outputs=export_md_btn
)
if __name__ == "__main__":
# Launch Gradio
demo.queue() # enables concurrency/streaming
demo.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", "7860")))