init devika repo

2024-07-01 22:49:56 +03:00 · 2024-07-01 22:49:56 +03:00 · f0b94ab9bd
commit f0b94ab9bd
164 changed files with 8016 additions and 0 deletions
--- a/.assets/devika-avatar.png
+++ b/.assets/devika-avatar.png
--- a/.assets/devika-pygame-demo.mp4
+++ b/.assets/devika-pygame-demo.mp4
--- a/.assets/devika-screenshot.png
+++ b/.assets/devika-screenshot.png
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,163 @@
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
 *$py.class
 # C extensions
 *.so
 # Distribution / packaging
 .Python
 build/
 develop-eggs/
 dist/
 downloads/
 eggs/
 .eggs/
 lib64/
 parts/
 sdist/
 var/
 wheels/
 share/python-wheels/
 *.egg-info/
 .installed.cfg
 *.egg
 MANIFEST
 # PyInstaller
 #  Usually these files are written by a python script from a template
 #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 *.manifest
 *.spec
 # Installer logs
 pip-log.txt
 pip-delete-this-directory.txt
 # Unit test / coverage reports
 htmlcov/
 .tox/
 .nox/
 .coverage
 .coverage.*
 .cache
 nosetests.xml
 coverage.xml
 *.cover
 *.py,cover
 .hypothesis/
 .pytest_cache/
 cover/
 # Translations
 *.mo
 *.pot
 # Django stuff:
 *.log
 local_settings.py
 db.sqlite3
 db.sqlite3-journal
 # Flask stuff:
 instance/
 .webassets-cache
 # Scrapy stuff:
 .scrapy
 # Sphinx documentation
 docs/_build/
 # PyBuilder
 .pybuilder/
 target/
 # Jupyter Notebook
 .ipynb_checkpoints
 # IPython
 profile_default/
 ipython_config.py
 # pyenv
 #   For a library or package, you might want to ignore these files since the code is
 #   intended to run in multiple environments; otherwise, check them in:
 # .python-version
 # pipenv
 #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 #   install all needed dependencies.
 #Pipfile.lock
 # poetry
 #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 #   This is especially recommended for binary packages to ensure reproducibility, and is more
 #   commonly ignored for libraries.
 #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
 #poetry.lock
 # pdm
 #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
 #pdm.lock
 #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
 #   in version control.
 #   https://pdm.fming.dev/#use-with-ide
 .pdm.toml
 # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
 __pypackages__/
 # Celery stuff
 celerybeat-schedule
 celerybeat.pid
 # SageMath parsed files
 *.sage.py
 # Environments
 .env
 .venv
 env/
 venv/
 ENV/
 env.bak/
 venv.bak/
 config.toml
 # Spyder project settings
 .spyderproject
 .spyproject
 # Rope project settings
 .ropeproject
 # mkdocs documentation
 /site
 # mypy
 .mypy_cache/
 .dmypy.json
 dmypy.json
 # Pyre type checker
 .pyre/
 # pytype static type analyzer
 .pytype/
 # Cython debug symbols
 cython_debug/
 # PyCharm
 #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
 #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 .idea/
 notes.md
 data/
--- a/ARCHITECTURE.md
+++ b/ARCHITECTURE.md
@ -0,0 +1,251 @@
 # Devika Architecture
 Devika is an advanced AI software engineer that can understand high-level human instructions, break them down into steps, research relevant information, and write code to achieve a given objective. This document provides a detailed technical overview of Devika's system architecture and how the various components work together.
 ## Table of Contents
 1. [Overview](#overview)
 2. [Agent Core](#agent-core)
 3. [Agents](#agents)
   - [Planner](#planner)
   - [Researcher](#researcher) 
   - [Coder](#coder)
   - [Action](#action)
   - [Runner](#runner)
   - [Feature](#feature)
   - [Patcher](#patcher)
   - [Reporter](#reporter)
   - [Decision](#decision)
 4. [Language Models](#language-models)
 5. [Browser Interaction](#browser-interaction) 
 6. [Project Management](#project-management)
 7. [Agent State Management](#agent-state-management)
 8. [Services](#services)
 9. [Utilities](#utilities)
 10. [Conclusion](#conclusion)
 ## Overview
 At a high level, Devika consists of the following key components:
 - **Agent Core**: Orchestrates the overall AI planning, reasoning and execution process. Communicates with various sub-agents.
 - **Agents**: Specialized sub-agents that handle specific tasks like planning, research, coding, patching, reporting etc.  
 - **Language Models**: Leverages large language models (LLMs) like Claude, GPT-4, GPT-3 for natural language understanding and generation.
 - **Browser Interaction**: Enables web browsing, information gathering, and interaction with web elements.
 - **Project Management**: Handles organization and persistence of project-related data. 
 - **Agent State Management**: Tracks and persists the dynamic state of the AI agent across interactions.
 - **Services**: Integrations with external services like GitHub, Netlify for enhanced capabilities.
 - **Utilities**: Supporting modules for configuration, logging, vector search, PDF generation etc.
 Let's dive into each of these components in more detail.
 ## Agent Core
 The `Agent` class serves as the central engine that drives Devika's AI planning and execution loop. Here's how it works:
 1. When a user provides a high-level prompt, the `execute` method is invoked on the Agent. 
 2. The prompt is first passed to the Planner agent to generate a step-by-step plan.
 3. The Researcher agent then takes this plan and extracts relevant search queries and context.
 4. The Agent performs web searches using Bing Search API and crawls the top results. 
 5. The raw crawled content is passed through the Formatter agent to extract clean, relevant information.
 6. This researched context, along with the step-by-step plan, is fed to the Coder agent to generate code.
 7. The generated code is saved to the project directory on disk.
 8. If the user interacts further with a follow-up prompt, the `subsequent_execute` method is invoked.
 9. The Action agent determines the appropriate action to take based on the user's message (run code, deploy, write tests, add feature, fix bug, write report etc.)
 10. The corresponding specialized agent is invoked to perform the action (Runner, Feature, Patcher, Reporter).
 11. Results are communicated back to the user and the project files are updated.
 Throughout this process, the Agent Core is responsible for:
 - Managing conversation history and project-specific context
 - Updating agent state and internal monologue 
 - Accumulating context keywords across agent prompts
 - Emulating the "thinking" process of the AI through timed agent state updates
 - Handling special commands through the Decision agent (e.g. git clone, browser interaction session)
 ## Agents
 Devika's cognitive abilities are powered by a collection of specialized sub-agents. Each agent is implemented as a separate Python class. Agents communicate with the underlying LLMs through prompt templates defined in Jinja2 format. Key agents include:
 ### Planner
 - Generates a high-level step-by-step plan based on the user's prompt
 - Extracts focus area and provides a summary
 - Uses few-shot prompting to provide examples of the expected response format
 ### Researcher
 - Takes the generated plan and extracts relevant search queries 
 - Ranks and filters queries based on relevance and specificity
 - Prompts the user for additional context if required
 - Aims to maximize information gain while minimizing number of searches
 ### Coder
 - Generates code based on the step-by-step plan and researched context
 - Segments code into appropriate files and directories
 - Includes informative comments and documentation
 - Handles a variety of languages and frameworks
 - Validates code syntax and style
 ### Action
 - Determines the appropriate action to take based on the user's follow-up prompt
 - Maps user intent to a specific action keyword (run, test, deploy, fix, implement, report)
 - Provides a human-like confirmation of the action to the user
 ### Runner
 - Executes the written code in a sandboxed environment 
 - Handles different OS environments (Mac, Linux, Windows)
 - Streams command output to user in real-time
 - Gracefully handles errors and exceptions
 ### Feature
 - Implements a new feature based on user's specification
 - Modifies existing project files while maintaining code structure and style
 - Performs incremental testing to verify feature is working as expected
 ### Patcher
 - Debugs and fixes issues based on user's description or error message
 - Analyzes existing code to identify potential root causes
 - Suggests and implements fix, with explanation of the changes made
 ### Reporter
 - Generates a comprehensive report summarizing the project
 - Includes high-level overview, technical design, setup instructions, API docs etc.
 - Formats report in a clean, readable structure with table of contents
 - Exports report as a PDF document
 ### Decision
 - Handles special command-like instructions that don't fit other agents
 - Maps commands to specific functions (git clone, browser interaction etc.)
 - Executes the corresponding function with provided arguments
 Each agent follows a common pattern:
 1. Prepare a prompt by rendering the Jinja2 template with current context
 2. Query the LLM to get a response based on the prompt
 3. Validate and parse the LLM's response to extract structured output
 4. Perform any additional processing or side-effects (e.g. save to disk)
 5. Return the result to the Agent Core for further action
 Agents aim to be stateless and idempotent where possible. State and history is managed by the Agent Core and passed into the agents as needed. This allows for a modular, composable design.
 ## Language Models
 Devika's natural language processing capabilities are driven by state-of-the-art LLMs. The `LLM` class provides a unified interface to interact with different language models:
 - **Claude** (Anthropic): Claude models like claude-v1.3, claude-instant-v1.0 etc.
 - **GPT-4/GPT-3** (OpenAI): Models like gpt-4, gpt-3.5-turbo etc.
 - **Self-hosted models** (via [Ollama](https://ollama.com/)): Allows using open-source models in a self-hosted environment
 The `LLM` class abstracts out the specifics of each provider's API, allowing agents to interact with the models in a consistent way. It supports:
 - Listing available models
 - Generating completions based on a prompt
 - Tracking and accumulating token usage over time
 Choosing the right model for a given use case depends on factors like desired quality, speed, cost etc. The modular design allows swapping out models easily.
 ## Browser Interaction
 Devika can interact with webpages in an automated fashion to gather information and perform actions. This is powered by the `Browser` and `Crawler` classes.
 The `Browser` class uses Playwright to provide high-level web automation primitives:
 - Spawning a browser instance (Chromium)
 - Navigating to a URL
 - Querying DOM elements 
 - Extracting page content as text, Markdown, PDF etc.
 - Taking a screenshot of the page
 The `Crawler` class defines an agent that can interact with a webpage based on natural language instructions. It leverages:
 - Pre-defined browser actions like scroll, click, type etc.
 - A prompt template that provides examples of how to use these actions
 - LLM to determine the best action to take based on current page content and objective
 The `start_interaction` function sets up a loop where:
 1. The current page content and objective is passed to the LLM 
 2. The LLM returns the next best action to take (e.g. "CLICK 12" or "TYPE 7 machine learning")
 3. The Crawler executes this action on the live page
 4. The process repeats from the updated page state
 This allows performing a sequence of actions to achieve a higher-level objective (e.g. research a topic, fill out a form, interact with an app etc.)
 ## Project Management
 The `ProjectManager` class is responsible for creating, updating and querying projects and their associated metadata. Key functions include:
 - Creating a new project and initializing its directory structure
 - Deleting a project and its associated files
 - Adding a message to a project's conversation history
 - Retrieving messages for a given project
 - Getting the latest user/AI message in a conversation
 - Listing all projects
 - Zipping a project's files for export
 Project metadata is persisted in a SQLite database using SQLModel. The `Projects` table stores:
 - Project name
 - JSON-serialized conversation history
 This allows the agent to work on multiple projects simultaneously and retain conversation history across sessions.
 ## Agent State Management
 As the AI agent works on a task, we need to track and display its internal state to the user. The `AgentState` class handles this by providing an interface to:
 - Initialize a new agent state 
 - Add a state to the current sequence of states for a project
 - Update the latest state for a project
 - Query the latest state or entire state history for a project
 - Mark the agent as active/inactive or task as completed
 Agent state includes information like:
 - Current step or action being executed
 - Internal monologue reflecting the agent's current "thoughts"
 - Browser interactions (URL visited, screenshot)
 - Terminal interactions (command executed, output)
 - Token usage so far
 Like projects, agent states are also persisted in the SQLite DB using SQLModel. The `AgentStateModel` table stores:
 - Project name
 - JSON-serialized list of states
 Having a persistent log of agent states is useful for:
 - Providing real-time visibility to the user
 - Auditing and debugging agent behavior
 - Resuming from interruptions or failures
 ## Services
 Devika integrates with external services to augment its capabilities:
 - **GitHub**: Performing git operations like clone/pull, listing repos/commits/files etc.
 - **Netlify**: Deploying web apps and sites seamlessly
 The `GitHub` and `Netlify` classes provide lightweight wrappers around the respective service APIs. 
 They handle authentication, making HTTP requests, and parsing responses.
 This allows Devika to perform actions like:
 - Cloning a repo given a GitHub URL
 - Listing a user's GitHub repos 
 - Creating a new Netlify site
 - Deploying a directory to Netlify 
 - Providing the deployed site URL to the user
 Integrations are done in a modular way so that new services can be added easily.
 ## Utilities  
 Devika makes use of several utility modules to support its functioning:
 - `Config`: Loads and provides access to configuration settings (API keys, folder paths etc.) 
 - `Logger`: Sets up logging to console and file, with support for log levels and colors
 - `ReadCode`: Recursively reads code files in a directory and converts them into a Markdown format
 - `SentenceBERT`: Extracts keywords and semantic information from text using SentenceBERT embeddings
 - `Experts`: A collection of domain-specific knowledge bases to assist in certain areas (e.g. webdev, physics, chemistry, math)
 The utility modules aim to provide reusable functionality that is used across different parts of the system.
 ## Conclusion
 Devika is a complex system that combines multiple AI and automation techniques to deliver an intelligent programming assistant. Key design principles include:
 - Modularity: Breaking down functionality into specialized agents and services
 - Flexibility: Supporting different LLMs, services and domains in a pluggable fashion  
 - Persistence: Storing project and agent state in a DB to enable pause/resume and auditing
 - Transparency: Surfacing agent thought process and interactions to user in real-time
 By understanding how the different components work together, we can extend, optimize and scale Devika to take on increasingly sophisticated software engineering tasks. The agent-based architecture provides a strong foundation to build more advanced AI capabilities in the future.
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -0,0 +1,38 @@
 # Welcome Contributors
 We welcome contributions to enhance Devika's capabilities and improve its performance. To report bugs, create a [GitHub issue](https://github.com/stitionai/devika/issues).
 > Before contributing, read through the existing issues and pull requests to see if someone else is already working on something similar. That way you can avoid duplicating efforts.
 To contribute, please follow these steps:
 1. Fork the Devika repository on GitHub.
 2. Create a new branch for your feature or bug fix.
 3. Make your changes and ensure that the code passes all tests.
 4. Submit a pull request describing your changes and their benefits.
 ### Pull Request Guidelines
 When submitting a pull request, please follow these guidelines:
 1. **Title**: please include following prefixes: 
   - `Feature:` for new features
   - `Fix:` for bug fixes
   - `Docs:` for documentation changes
   - `Refactor:` for code refactoring
   - `Improve:` for performance improvements
   - `Other:` for other changes
   for example: 
      - `Feature: added new feature to the code`
      - `Fix: fixed the bug in the code`
 2. **Description**: Provide a clear and detailed description of your changes in the pull request. Explain the problem you are solving, the approach you took, and any potential side effects or limitations of your changes.
 3. **Documentation**: Update the relevant documentation to reflect your changes. This includes the README file, code comments, and any other relevant documentation.
 4. **Dependencies**: If your changes require new dependencies, ensure that they are properly documented and added to the `requirements.txt` or `package.json` files.
 5. if the pull request does not meet the above guidelines, it may be closed without merging.
 **Note**: Please ensure that you have the latest version of the code before creating a pull request. If you have an existing fork, just sync your fork with the latest version of the Devika repository.
 Please adhere to the coding conventions, maintain clear documentation, and provide thorough testing for your contributions.
--- a/21
+++ b/21
@ -0,0 +1,21 @@
 MIT License
 Copyright (c) 2024 stition
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 in the Software without restriction, including without limitation the rights
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:
 The above copyright notice and this permission notice shall be included in all
 copies or substantial portions of the Software.
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
--- a/33
+++ b/33
@ -0,0 +1,33 @@
 .PHONY = setup deps compose-up compose-down compose-destroy
 # to check if docker is installed on the machine 
 DOCKER := $(shell command -v docker)
 DOCKER_COMPOSE := $(shell command -v docker-compose)
 deps:
 ifndef DOCKER
 	@echo "Docker is not available. Please install docker"
 	@echo "try running sudo apt-get install docker"
 	@exit 1
 endif
 ifndef DOCKER_COMPOSE
 	@echo "docker-compose is not available. Please install docker-compose"
 	@echo "try running sudo apt-get install docker-compose"
 	@exit 1
 endif
 setup:
 	sh +x build
 compose-down: deps
 	docker volume ls
 	docker-compose ps
 	docker images
 	docker-compose down;
 compose-up: deps compose-down
 	docker-compose up --build
 compose-destroy: deps
 	docker images | grep -i devika | awk '{print $$3}' | xargs docker rmi -f
 	docker volume prune
--- a/README.md
+++ b/README.md
@ -0,0 +1,183 @@
 <p align="center">
  <img src=".assets/devika-avatar.png" alt="Devika Logo" width="250">
 </p>
 <h1 align="center">🚀 Devika - Agentic AI Software Engineer 👩‍💻</h1>
 ![devika screenshot](.assets/devika-screenshot.png)
 > [!IMPORTANT]  
 > This project is currently in a very early development/experimental stage. There are a lot of unimplemented/broken features at the moment. Contributions are welcome to help out with the progress!
 ## Table of Contents
 - [About](#about)
 - [Key Features](#key-features)
 - [System Architecture](#system-architecture)
 - [Getting Started](#getting-started)
  - [Requirements](#requirements)
  - [Installation](#installation)
  - [How to use](#how-to-use)
 - [Configuration](#configuration)
 - [Contributing](#contributing)
 - [Help and Support](#help-and-support)
 - [License](#license)
 ## About
 Devika is an advanced AI software engineer that can understand high-level human instructions, break them down into steps, research relevant information, and write code to achieve the given objective. Devika utilizes large language models, planning and reasoning algorithms, and web browsing abilities to intelligently develop software.
 Devika aims to revolutionize the way we build software by providing an AI pair programmer who can take on complex coding tasks with minimal human guidance. Whether you need to create a new feature, fix a bug, or develop an entire project from scratch, Devika is here to assist you.
 > [!NOTE]
 > Devika is modeled after [Devin](https://www.cognition-labs.com/introducing-devin) by Cognition AI. This project aims to be an open-source alternative to Devin with an "overly ambitious" goal to meet the same score as Devin in the [SWE-bench](https://www.swebench.com/) Benchmarks... and eventually beat it?
 ## Demos
 https://github.com/stitionai/devika/assets/26198477/cfed6945-d53b-4189-9fbe-669690204206
 ## Key Features
 - 🤖 Supports **Claude 3**, **GPT-4**, **Gemini**, **Mistral** , **Groq** and **Local LLMs** via [Ollama](https://ollama.com). For optimal performance: Use the **Claude 3** family of models.
 - 🧠 Advanced AI planning and reasoning capabilities
 - 🔍 Contextual keyword extraction for focused research
 - 🌐 Seamless web browsing and information gathering
 - 💻 Code writing in multiple programming languages
 - 📊 Dynamic agent state tracking and visualization
 - 💬 Natural language interaction via chat interface
 - 📂 Project-based organization and management
 - 🔌 Extensible architecture for adding new features and integrations
 ## System Architecture
 Read [**README.md**](docs/architecture) for the detailed documentation.
 ## Getting Started
 ### Requirements
 ```
 Version's requirements
  - Python >= 3.10 and < 3.12
  - NodeJs >= 18
  - bun
 ```
 - Install uv - Python Package manager [download](https://github.com/astral-sh/uv)
 - Install bun - JavaScript runtime [download](https://bun.sh/docs/installation)
 - For ollama [ollama setup guide](docs/Installation/ollama.md) (optinal: if you don't want to use the local models then you can skip this step)
 - For API models, configure the API keys via setting page in UI.
 ### Installation
 To install Devika, follow these steps:
 1. Clone the Devika repository:
   ```bash
   git clone https://github.com/stitionai/devika.git
   ```
 2. Navigate to the project directory:
   ```bash
   cd devika
   ```
 3. Create a virtual environment and install the required dependencies (you can use any virtual environment manager):
   ```bash
   uv venv
   # On macOS and Linux.
   source .venv/bin/activate
   # On Windows.
   .venv\Scripts\activate
   uv pip install -r requirements.txt
   ```
 4. Install the playwright for browsering capabilities:
   ```bash
   playwright install --with-deps # installs browsers in playwright (and their deps) if required
   ```
 5. Start the Devika server:
   ```bash
   python devika.py
   ```
 6. if everything is working fine, you see the following output:
   ```bash
   root: INFO   : Devika is up and running!
   ```
 7. Now, for frontend, open a new terminal and navigate to the `ui` directory:
   ```bash
   cd ui/
   bun install
   bun run start
   ```
 8. Access the Devika web interface by opening a browser and navigating to `http://127.0.0.1:3001`
 ### how to use
 To start using Devika, follow these steps:
 1. Open the Devika web interface in your browser.
 2. To create a project, click on 'select project' and then click on 'new project'.
 3. Select the search engine and model configuration for your project.
 4. In the chat interface, provide a high-level objective or task description for Devika to work on.
 5. Devika will process your request, break it down into steps, and start working on the task.
 6. Monitor Devika's progress, view generated code, and provide additional guidance or feedback as needed.
 7. Once Devika completes the task, review the generated code and project files.
 8. Iterate and refine the project as desired by providing further instructions or modifications.
 ## Configuration
 Devika requires certain configuration settings and API keys to function properly:
 when you first time run Devika, it will create a `config.toml` file for you in the root directory. You can configure the following settings in the settings page via UI:
 - API KEYS
   - `BING`: Your Bing Search API key for web searching capabilities.
   - `GOOGLE_SEARCH`: Your Google Search API key for web searching capabilities.
   - `GOOGLE_SEARCH_ENGINE_ID`: Your Google Search Engine ID for web searching using Google.
   - `OPENAI`: Your OpenAI API key for accessing GPT models.
   - `GEMINI`: Your Gemini API key for accessing Gemini models.
   - `CLAUDE`: Your Anthropic API key for accessing Claude models.
   - `MISTRAL`: Your Mistral API key for accessing Mistral models.
   - `GROQ`: Your Groq API key for accessing Groq models.
   - `NETLIFY`: Your Netlify API key for deploying and managing web projects.
 - API_ENDPOINTS
   - `BING`: The Bing API endpoint for web searching.
   - `GOOGLE`: The Google API endpoint for web searching.
   - `OLLAMA`: The Ollama API endpoint for accessing Local LLMs.
   - `OPENAI`: The OpenAI API endpoint for accessing OpenAI models.
 Make sure to keep your API keys secure and do not share them publicly. For setting up the Bing and Google search API keys, follow the instructions in the [search engine setup](docs/Installation/search_engine.md)
 ## Contributing
 We welcome contributions to enhance Devika's capabilities and improve its performance. To contribute, please see the [`CONTRIBUTING.md`](CONTRIBUTING.md) file for steps.
 ## Help and Support
 If you have any questions, feedback, or suggestions, please feel free to reach out to us. you can raise an issue in the [issue tracker](https://github.com/stitionai/devika/issues) or join the [discussions](https://github.com/stitionai/devika/discussions) for general discussions.
 We also have a Discord server for the Devika community, where you can connect with other users, share your experiences, ask questions, and collaborate on the project. To join the Devika community Discord server, [click here](https://discord.gg/CYRp43878y).
 ## License
 Devika is released under the [MIT License](https://opensource.org/licenses/MIT). See the `LICENSE` file for more information.
 ## Star History
 <div align="center">
 <a href="https://star-history.com/#stitionai/devika&Date">
 <picture>
   <source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/svg?repos=stitionai/devika&type=Date&theme=dark" />
   <source media="(prefers-color-scheme: light)" srcset="https://api.star-history.com/svg?repos=stitionai/devika&type=Date" />
   <img alt="Star History Chart" src="https://api.star-history.com/svg?repos=stitionai/devika&type=Date" />
 </picture>
 </a>
 </div>
 ---
 We hope you find Devika to be a valuable tool in your software development journey. If you have any questions, feedback, or suggestions, please don't hesitate to reach out. Happy coding with Devika!
--- a/ROADMAP.md
+++ b/ROADMAP.md
@ -0,0 +1,7 @@
 # Roadmap
 - [ ] Create an extensive testing suite for all [Agents](https://github.com/stitionai/devika/tree/main/src/agents).
 - [ ] Catch down on all runtime errors and prepare for Project Devika stable release.
 - [ ] Document and implement easy cross-platform installation/setup scripts and packages.
 - [ ] Create tutorial videos on the installation steps, setup, and usage for Windows, Linux, and MacOS.
 - [ ] Focusing on the Claude 3 Opus model, test Devika on the [SWE-Bench](https://www.swebench.com/) benchmarks.
--- a/app.dockerfile
+++ b/app.dockerfile
@ -0,0 +1,29 @@
 FROM debian:12
 # setting up build variable
 ARG VITE_API_BASE_URL
 ENV VITE_API_BASE_URL=${VITE_API_BASE_URL}
 # setting up os env
 USER root
 WORKDIR /home/nonroot/client
 RUN groupadd -r nonroot && useradd -r -g nonroot -d /home/nonroot/client -s /bin/bash nonroot
 # install node js 
 RUN apt-get update && apt-get upgrade -y
 RUN apt-get install -y build-essential software-properties-common curl sudo wget git
 RUN curl -fsSL https://deb.nodesource.com/setup_20.x | sudo -E bash -
 RUN apt-get install nodejs
 # copying devika app client only
 COPY ui /home/nonroot/client/ui
 COPY src /home/nonroot/client/src
 COPY config.toml /home/nonroot/client/
 RUN cd ui && npm install && npm install -g npm && npm install -g bun
 RUN chown -R nonroot:nonroot /home/nonroot/client
 USER nonroot
 WORKDIR /home/nonroot/client/ui
 ENTRYPOINT [ "npx", "bun", "run", "dev", "--", "--host" ]
--- a/benchmarks/BENCHMARKS.md
+++ b/benchmarks/BENCHMARKS.md
--- a/benchmarks/SWE-bench.md
+++ b/benchmarks/SWE-bench.md
@ -0,0 +1 @@
 > ...Not yet
--- a/devika.dockerfile
+++ b/devika.dockerfile
@ -0,0 +1,38 @@
 FROM debian:12
 # setting up os env
 USER root
 WORKDIR /home/nonroot/devika
 RUN groupadd -r nonroot && useradd -r -g nonroot -d /home/nonroot/devika -s /bin/bash nonroot
 ENV PYTHONUNBUFFERED 1
 ENV PYTHONDONTWRITEBYTECODE 1
 # setting up python3
 RUN apt-get update && apt-get upgrade -y
 RUN apt-get install -y build-essential software-properties-common curl sudo wget git
 RUN apt-get install -y python3 python3-pip
 RUN curl -fsSL https://astral.sh/uv/install.sh | sudo -E bash -
 RUN $HOME/.cargo/bin/uv venv
 ENV PATH="/home/nonroot/devika/.venv/bin:$HOME/.cargo/bin:$PATH"
 # copy devika python engine only
 RUN $HOME/.cargo/bin/uv venv
 COPY requirements.txt /home/nonroot/devika/
 RUN UV_HTTP_TIMEOUT=100000 $HOME/.cargo/bin/uv pip install -r requirements.txt 
 RUN playwright install-deps chromium
 RUN playwright install chromium
 COPY src /home/nonroot/devika/src
 COPY config.toml /home/nonroot/devika/
 COPY sample.config.toml /home/nonroot/devika/
 COPY devika.py /home/nonroot/devika/
 RUN chown -R nonroot:nonroot /home/nonroot/devika
 USER nonroot
 WORKDIR /home/nonroot/devika
 ENV PATH="/home/nonroot/devika/.venv/bin:$HOME/.cargo/bin:$PATH"
 RUN mkdir /home/nonroot/devika/db
 ENTRYPOINT [ "python3", "-m", "devika" ]
--- a/devika.py
+++ b/devika.py
@ -0,0 +1,209 @@
 """
    DO NOT REARRANGE THE ORDER OF THE FUNCTION CALLS AND VARIABLE DECLARATIONS
    AS IT MAY CAUSE IMPORT ERRORS AND OTHER ISSUES
 """
 from gevent import monkey
 monkey.patch_all()
 from src.init import init_devika
 init_devika()
 from flask import Flask, request, jsonify, send_file
 from flask_cors import CORS
 from src.socket_instance import socketio, emit_agent
 import os
 import logging
 from threading import Thread
 import tiktoken
 from src.apis.project import project_bp
 from src.config import Config
 from src.logger import Logger, route_logger
 from src.project import ProjectManager
 from src.state import AgentState
 from src.agents import Agent
 from src.llm import LLM
 app = Flask(__name__)
 CORS(app, resources={r"/*": {"origins": # Change the origin to your frontend URL
                             [
                                 "https://localhost:3000",
                                 "http://localhost:3000",
                                 ]}}) 
 app.register_blueprint(project_bp)
 socketio.init_app(app)
 log = logging.getLogger("werkzeug")
 log.disabled = True
 TIKTOKEN_ENC = tiktoken.get_encoding("cl100k_base")
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
 manager = ProjectManager()
 AgentState = AgentState()
 config = Config()
 logger = Logger()
 # initial socket
@socketio.on('socket_connect')
 def test_connect(data):
    print("Socket connected :: ", data)
    emit_agent("socket_response", {"data": "Server Connected"})
@app.route("/api/data", methods=["GET"])
@route_logger(logger)
 def data():
    project = manager.get_project_list()
    models = LLM().list_models()
    search_engines = ["Bing", "Google", "DuckDuckGo"]
    return jsonify({"projects": project, "models": models, "search_engines": search_engines})
@app.route("/api/messages", methods=["POST"])
 def get_messages():
    data = request.json
    project_name = data.get("project_name")
    messages = manager.get_messages(project_name)
    return jsonify({"messages": messages})
 # Main socket
@socketio.on('user-message')
 def handle_message(data):
    logger.info(f"User message: {data}")
    message = data.get('message')
    base_model = data.get('base_model')
    project_name = data.get('project_name')
    search_engine = data.get('search_engine').lower()
    agent = Agent(base_model=base_model, search_engine=search_engine)
    state = AgentState.get_latest_state(project_name)
    if not state:
        thread = Thread(target=lambda: agent.execute(message, project_name))
        thread.start()
    else:
        if AgentState.is_agent_completed(project_name):
            thread = Thread(target=lambda: agent.subsequent_execute(message, project_name))
            thread.start()
        else:
            emit_agent("info", {"type": "warning", "message": "previous agent doesn't completed it's task."})
            last_state = AgentState.get_latest_state(project_name)
            if last_state["agent_is_active"] or not last_state["completed"]:
                thread = Thread(target=lambda: agent.execute(message, project_name))
                thread.start()
            else:
                thread = Thread(target=lambda: agent.subsequent_execute(message, project_name))
                thread.start()
@app.route("/api/is-agent-active", methods=["POST"])
@route_logger(logger)
 def is_agent_active():
    data = request.json
    project_name = data.get("project_name")
    is_active = AgentState.is_agent_active(project_name)
    return jsonify({"is_active": is_active})
@app.route("/api/get-agent-state", methods=["POST"])
@route_logger(logger)
 def get_agent_state():
    data = request.json
    project_name = data.get("project_name")
    agent_state = AgentState.get_latest_state(project_name)
    return jsonify({"state": agent_state})
@app.route("/api/get-browser-snapshot", methods=["GET"])
@route_logger(logger)
 def browser_snapshot():
    snapshot_path = request.args.get("snapshot_path")
    return send_file(snapshot_path, as_attachment=True)
@app.route("/api/get-browser-session", methods=["GET"])
@route_logger(logger)
 def get_browser_session():
    project_name = request.args.get("project_name")
    agent_state = AgentState.get_latest_state(project_name)
    if not agent_state:
        return jsonify({"session": None})
    else:
        browser_session = agent_state["browser_session"]
        return jsonify({"session": browser_session})
@app.route("/api/get-terminal-session", methods=["GET"])
@route_logger(logger)
 def get_terminal_session():
    project_name = request.args.get("project_name")
    agent_state = AgentState.get_latest_state(project_name)
    if not agent_state:
        return jsonify({"terminal_state": None})
    else:
        terminal_state = agent_state["terminal_session"]
        return jsonify({"terminal_state": terminal_state})
@app.route("/api/run-code", methods=["POST"])
@route_logger(logger)
 def run_code():
    data = request.json
    project_name = data.get("project_name")
    code = data.get("code")
    # TODO: Implement code execution logic
    return jsonify({"message": "Code execution started"})
@app.route("/api/calculate-tokens", methods=["POST"])
@route_logger(logger)
 def calculate_tokens():
    data = request.json
    prompt = data.get("prompt")
    tokens = len(TIKTOKEN_ENC.encode(prompt))
    return jsonify({"token_usage": tokens})
@app.route("/api/token-usage", methods=["GET"])
@route_logger(logger)
 def token_usage():
    project_name = request.args.get("project_name")
    token_count = AgentState.get_latest_token_usage(project_name)
    return jsonify({"token_usage": token_count})
@app.route("/api/logs", methods=["GET"])
 def real_time_logs():
    log_file = logger.read_log_file()
    return jsonify({"logs": log_file})
@app.route("/api/settings", methods=["POST"])
@route_logger(logger)
 def set_settings():
    data = request.json
    config.update_config(data)
    return jsonify({"message": "Settings updated"})
@app.route("/api/settings", methods=["GET"])
@route_logger(logger)
 def get_settings():
    configs = config.get_config()
    return jsonify({"settings": configs})
@app.route("/api/status", methods=["GET"])
@route_logger(logger)
 def status():
    return jsonify({"status": "server is running!"})
 if __name__ == "__main__":
    logger.info("Devika is up and running!")
    socketio.run(app, debug=False, port=1337, host="0.0.0.0")
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@ -0,0 +1,61 @@
 version: "3.9"
 services:
  ollama-service:
    image: ollama/ollama:latest
    expose:
      - 11434
    ports:
      - 11434:11434
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://localhost:11434/ || exit 1"]
      interval: 5s
      timeout: 30s
      retries: 5
      start_period: 30s
    networks:
      - devika-subnetwork
  devika-backend-engine:
    build:
      context: .
      dockerfile: devika.dockerfile
    depends_on:
      - ollama-service
    expose:
      - 1337
    ports:
      - 1337:1337
    environment:
      - OLLAMA_HOST=http://ollama-service:11434
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://localhost:1337/ || exit 1"]
      interval: 5s
      timeout: 30s
      retries: 5
      start_period: 30s
    volumes:
      - devika-backend-dbstore:/home/nonroot/devika/db
    networks:
      - devika-subnetwork
  devika-frontend-app:
    build:
      context: .
      dockerfile: app.dockerfile
      args:
        - VITE_API_BASE_URL=http://127.0.0.1:1337
    depends_on:
      - devika-backend-engine
    expose:
      - 3000
    ports:
      - 3000:3000
    networks:
      - devika-subnetwork
 networks:
  devika-subnetwork:
 volumes:
  devika-backend-dbstore:
--- a/docs/Installation/images/bing-1.png
+++ b/docs/Installation/images/bing-1.png
--- a/docs/Installation/images/bing.png
+++ b/docs/Installation/images/bing.png
--- a/docs/Installation/images/google-2.png
+++ b/docs/Installation/images/google-2.png
--- a/docs/Installation/images/google.png
+++ b/docs/Installation/images/google.png
--- a/docs/Installation/ollama.md
+++ b/docs/Installation/ollama.md
@ -0,0 +1,20 @@
 # Ollama Installation Guide
 This guide will help you set up Ollama for Devika. Ollama is a tool that allows you to run open-source large language models (LLMs) locally on your machine. It supports varity of models like Llama-2, mistral, code-llama and many more.
 ## Installation
 1. go to the [Ollama](https://ollama.com) website.
 2. Download the latest version of the Ollama.
 3. After installing the Ollama, you have to download the model you want to use. [Models](https://ollama.com/library)
 4. select the model you want to download and copy the command. for example, `ollama run llama2`.it will download the model and start the server. 
 5. `ollama list` will show the list of models you have downloaded.
 6. if the server isn't running then you can manually start by `ollama serve`. default address for the server is `http://localhost:11434`
 7. for changing port and other configurations, follow the FAQ [here](https://github.com/ollama/ollama/blob/main/docs/faq.md)
 8. for more information, `ollama [command] --help` will show the help menu. for example, `ollama run --help` will show the help menu for the run command.
 ## Devika Configuration
 - if you serve the Ollama on a different address, you can change the port in the `config.toml` file or you can change it via UI.
 - if you are using the default address, devika will automatically detect the server and and fetch the models list.
--- a/docs/Installation/search_engine.md
+++ b/docs/Installation/search_engine.md
@ -0,0 +1,33 @@
 # search Engine setup
 To use the search engine capabilities of Devika, you need to set up the search engine API keys. Currently, Devika supports Bing, Google and DuckDuckGo search engines. If you want to use duckduckgo, you don't need to set up any API keys.
 For Bing and Google search engines, you need to set up the API keys. Here's how you can do it:
 ## Bing Search API
 - Create Azure account. You can create a free account [here](https://azure.microsoft.com/en-us/free/). 
 - Go to the [Bing Search API](https://www.microsoft.com/en-us/bing/apis/bing-web-search-api) website.
 - click on the `Try now` button.
 - Sign in/sign up with your Azure account.
 - Create a new resource group (if you don't have any).
 ![alt text](images/bing.png)
 - click on the `Review and create` button.
 - if everything is fine, click on the `Create` button.
 - Once the resource is created, go to the `Keys and Endpoint` tab.
 ![alt text](images/bing-1.png)
 - Copy either `Key1` or `Key2` and paste it into the `API_KEYS` field with the name `BING` in the `config.toml` file located in the root directory of Devika, or you can set it via the UI.
 - Copy the `Endpoint` and paste it into the `API_Endpoints` field with the name `BING` in the `config.toml` file located in the root directory of Devika, or you can set it via the UI.
 ## Google Search API
 - if don't have then create GCP account [Google Cloud Console](https://console.cloud.google.com/).
 - visit [Here](https://developers.google.com/custom-search/v1/overview) is the official documentation.
 - click on `Get a Key`.
 - select the project you have or create a new project. click on next.
 ![alt text](images/google.png)
 - it enable the Custom Search API for the project and create the API key.
 - Copy the API key and paste it in the API_KEYS field with the name `GOOGLE_SEARCH` in the `config.toml` file in the root directory of Devika or you can set it via UI.
 - for the search engine id, go to the [Google Custom Search Engine](https://programmablesearchengine.google.com/controlpanel/all) website.
 - click on the `Add` button.
 ![alt text](images/google-2.png)
 - After creating the engine. Copy the `Search Engine ID` and paste it in the API_Endpoints field with the name `GOOGLE_SEARCH_ENGINE_ID` in the `config.toml` file in the root directory of Devika or you can set it via UI.
--- a/docs/architecture/ARCHITECTURE.md
+++ b/docs/architecture/ARCHITECTURE.md
@ -0,0 +1,251 @@
 # Devika Architecture
 Devika is an advanced AI software engineer that can understand high-level human instructions, break them down into steps, research relevant information, and write code to achieve a given objective. This document provides a detailed technical overview of Devika's system architecture and how the various components work together.
 ## Table of Contents
 1. [Overview](#overview)
 2. [Agent Core](#agent-core)
 3. [Agents](#agents)
   - [Planner](#planner)
   - [Researcher](#researcher) 
   - [Coder](#coder)
   - [Action](#action)
   - [Runner](#runner)
   - [Feature](#feature)
   - [Patcher](#patcher)
   - [Reporter](#reporter)
   - [Decision](#decision)
 4. [Language Models](#language-models)
 5. [Browser Interaction](#browser-interaction) 
 6. [Project Management](#project-management)
 7. [Agent State Management](#agent-state-management)
 8. [Services](#services)
 9. [Utilities](#utilities)
 10. [Conclusion](#conclusion)
 ## Overview
 At a high level, Devika consists of the following key components:
 - **Agent Core**: Orchestrates the overall AI planning, reasoning and execution process. Communicates with various sub-agents.
 - **Agents**: Specialized sub-agents that handle specific tasks like planning, research, coding, patching, reporting etc.  
 - **Language Models**: Leverages large language models (LLMs) like Claude, GPT-4, GPT-3 for natural language understanding and generation.
 - **Browser Interaction**: Enables web browsing, information gathering, and interaction with web elements.
 - **Project Management**: Handles organization and persistence of project-related data. 
 - **Agent State Management**: Tracks and persists the dynamic state of the AI agent across interactions.
 - **Services**: Integrations with external services like GitHub, Netlify for enhanced capabilities.
 - **Utilities**: Supporting modules for configuration, logging, vector search, PDF generation etc.
 Let's dive into each of these components in more detail.
 ## Agent Core
 The `Agent` class serves as the central engine that drives Devika's AI planning and execution loop. Here's how it works:
 1. When a user provides a high-level prompt, the `execute` method is invoked on the Agent. 
 2. The prompt is first passed to the Planner agent to generate a step-by-step plan.
 3. The Researcher agent then takes this plan and extracts relevant search queries and context.
 4. The Agent performs web searches using Bing Search API and crawls the top results. 
 5. The raw crawled content is passed through the Formatter agent to extract clean, relevant information.
 6. This researched context, along with the step-by-step plan, is fed to the Coder agent to generate code.
 7. The generated code is saved to the project directory on disk.
 8. If the user interacts further with a follow-up prompt, the `subsequent_execute` method is invoked.
 9. The Action agent determines the appropriate action to take based on the user's message (run code, deploy, write tests, add feature, fix bug, write report etc.)
 10. The corresponding specialized agent is invoked to perform the action (Runner, Feature, Patcher, Reporter).
 11. Results are communicated back to the user and the project files are updated.
 Throughout this process, the Agent Core is responsible for:
 - Managing conversation history and project-specific context
 - Updating agent state and internal monologue 
 - Accumulating context keywords across agent prompts
 - Emulating the "thinking" process of the AI through timed agent state updates
 - Handling special commands through the Decision agent (e.g. git clone, browser interaction session)
 ## Agents
 Devika's cognitive abilities are powered by a collection of specialized sub-agents. Each agent is implemented as a separate Python class. Agents communicate with the underlying LLMs through prompt templates defined in Jinja2 format. Key agents include:
 ### Planner
 - Generates a high-level step-by-step plan based on the user's prompt
 - Extracts focus area and provides a summary
 - Uses few-shot prompting to provide examples of the expected response format
 ### Researcher
 - Takes the generated plan and extracts relevant search queries 
 - Ranks and filters queries based on relevance and specificity
 - Prompts the user for additional context if required
 - Aims to maximize information gain while minimizing number of searches
 ### Coder
 - Generates code based on the step-by-step plan and researched context
 - Segments code into appropriate files and directories
 - Includes informative comments and documentation
 - Handles a variety of languages and frameworks
 - Validates code syntax and style
 ### Action
 - Determines the appropriate action to take based on the user's follow-up prompt
 - Maps user intent to a specific action keyword (run, test, deploy, fix, implement, report)
 - Provides a human-like confirmation of the action to the user
 ### Runner
 - Executes the written code in a sandboxed environment 
 - Handles different OS environments (Mac, Linux, Windows)
 - Streams command output to user in real-time
 - Gracefully handles errors and exceptions
 ### Feature
 - Implements a new feature based on user's specification
 - Modifies existing project files while maintaining code structure and style
 - Performs incremental testing to verify feature is working as expected
 ### Patcher
 - Debugs and fixes issues based on user's description or error message
 - Analyzes existing code to identify potential root causes
 - Suggests and implements fix, with explanation of the changes made
 ### Reporter
 - Generates a comprehensive report summarizing the project
 - Includes high-level overview, technical design, setup instructions, API docs etc.
 - Formats report in a clean, readable structure with table of contents
 - Exports report as a PDF document
 ### Decision
 - Handles special command-like instructions that don't fit other agents
 - Maps commands to specific functions (git clone, browser interaction etc.)
 - Executes the corresponding function with provided arguments
 Each agent follows a common pattern:
 1. Prepare a prompt by rendering the Jinja2 template with current context
 2. Query the LLM to get a response based on the prompt
 3. Validate and parse the LLM's response to extract structured output
 4. Perform any additional processing or side-effects (e.g. save to disk)
 5. Return the result to the Agent Core for further action
 Agents aim to be stateless and idempotent where possible. State and history is managed by the Agent Core and passed into the agents as needed. This allows for a modular, composable design.
 ## Language Models
 Devika's natural language processing capabilities are driven by state-of-the-art LLMs. The `LLM` class provides a unified interface to interact with different language models:
 - **Claude** (Anthropic): Claude models like claude-v1.3, claude-instant-v1.0 etc.
 - **GPT-4/GPT-3** (OpenAI): Models like gpt-4, gpt-3.5-turbo etc.
 - **Self-hosted models** (via [Ollama](https://ollama.com/)): Allows using open-source models in a self-hosted environment
 The `LLM` class abstracts out the specifics of each provider's API, allowing agents to interact with the models in a consistent way. It supports:
 - Listing available models
 - Generating completions based on a prompt
 - Tracking and accumulating token usage over time
 Choosing the right model for a given use case depends on factors like desired quality, speed, cost etc. The modular design allows swapping out models easily.
 ## Browser Interaction
 Devika can interact with webpages in an automated fashion to gather information and perform actions. This is powered by the `Browser` and `Crawler` classes.
 The `Browser` class uses Playwright to provide high-level web automation primitives:
 - Spawning a browser instance (Chromium)
 - Navigating to a URL
 - Querying DOM elements 
 - Extracting page content as text, Markdown, PDF etc.
 - Taking a screenshot of the page
 The `Crawler` class defines an agent that can interact with a webpage based on natural language instructions. It leverages:
 - Pre-defined browser actions like scroll, click, type etc.
 - A prompt template that provides examples of how to use these actions
 - LLM to determine the best action to take based on current page content and objective
 The `start_interaction` function sets up a loop where:
 1. The current page content and objective is passed to the LLM 
 2. The LLM returns the next best action to take (e.g. "CLICK 12" or "TYPE 7 machine learning")
 3. The Crawler executes this action on the live page
 4. The process repeats from the updated page state
 This allows performing a sequence of actions to achieve a higher-level objective (e.g. research a topic, fill out a form, interact with an app etc.)
 ## Project Management
 The `ProjectManager` class is responsible for creating, updating and querying projects and their associated metadata. Key functions include:
 - Creating a new project and initializing its directory structure
 - Deleting a project and its associated files
 - Adding a message to a project's conversation history
 - Retrieving messages for a given project
 - Getting the latest user/AI message in a conversation
 - Listing all projects
 - Zipping a project's files for export
 Project metadata is persisted in a SQLite database using SQLModel. The `Projects` table stores:
 - Project name
 - JSON-serialized conversation history
 This allows the agent to work on multiple projects simultaneously and retain conversation history across sessions.
 ## Agent State Management
 As the AI agent works on a task, we need to track and display its internal state to the user. The `AgentState` class handles this by providing an interface to:
 - Initialize a new agent state 
 - Add a state to the current sequence of states for a project
 - Update the latest state for a project
 - Query the latest state or entire state history for a project
 - Mark the agent as active/inactive or task as completed
 Agent state includes information like:
 - Current step or action being executed
 - Internal monologue reflecting the agent's current "thoughts"
 - Browser interactions (URL visited, screenshot)
 - Terminal interactions (command executed, output)
 - Token usage so far
 Like projects, agent states are also persisted in the SQLite DB using SQLModel. The `AgentStateModel` table stores:
 - Project name
 - JSON-serialized list of states
 Having a persistent log of agent states is useful for:
 - Providing real-time visibility to the user
 - Auditing and debugging agent behavior
 - Resuming from interruptions or failures
 ## Services
 Devika integrates with external services to augment its capabilities:
 - **GitHub**: Performing git operations like clone/pull, listing repos/commits/files etc.
 - **Netlify**: Deploying web apps and sites seamlessly
 The `GitHub` and `Netlify` classes provide lightweight wrappers around the respective service APIs. 
 They handle authentication, making HTTP requests, and parsing responses.
 This allows Devika to perform actions like:
 - Cloning a repo given a GitHub URL
 - Listing a user's GitHub repos 
 - Creating a new Netlify site
 - Deploying a directory to Netlify 
 - Providing the deployed site URL to the user
 Integrations are done in a modular way so that new services can be added easily.
 ## Utilities  
 Devika makes use of several utility modules to support its functioning:
 - `Config`: Loads and provides access to configuration settings (API keys, folder paths etc.) 
 - `Logger`: Sets up logging to console and file, with support for log levels and colors
 - `ReadCode`: Recursively reads code files in a directory and converts them into a Markdown format
 - `SentenceBERT`: Extracts keywords and semantic information from text using SentenceBERT embeddings
 - `Experts`: A collection of domain-specific knowledge bases to assist in certain areas (e.g. webdev, physics, chemistry, math)
 The utility modules aim to provide reusable functionality that is used across different parts of the system.
 ## Conclusion
 Devika is a complex system that combines multiple AI and automation techniques to deliver an intelligent programming assistant. Key design principles include:
 - Modularity: Breaking down functionality into specialized agents and services
 - Flexibility: Supporting different LLMs, services and domains in a pluggable fashion  
 - Persistence: Storing project and agent state in a DB to enable pause/resume and auditing
 - Transparency: Surfacing agent thought process and interactions to user in real-time
 By understanding how the different components work together, we can extend, optimize and scale Devika to take on increasingly sophisticated software engineering tasks. The agent-based architecture provides a strong foundation to build more advanced AI capabilities in the future.
--- a/docs/architecture/README.md
+++ b/docs/architecture/README.md
@ -0,0 +1,16 @@
 ## System Architecture
 Devika's system architecture consists of the following key components:
 1. **User Interface**: A web-based chat interface for interacting with Devika, viewing project files, and monitoring the agent's state.
 2. **Agent Core**: The central component that orchestrates the AI planning, reasoning, and execution process. It communicates with various sub-agents and modules to accomplish tasks.
 3. **Large Language Models**: Devika leverages state-of-the-art language models like **Claude**, **GPT-4**, and **Local LLMs via Ollama** for natural language understanding, generation, and reasoning.
 4. **Planning and Reasoning Engine**: Responsible for breaking down high-level objectives into actionable steps and making decisions based on the current context.
 5. **Research Module**: Utilizes keyword extraction and web browsing capabilities to gather relevant information for the task at hand.
 6. **Code Writing Module**: Generates code based on the plan, research findings, and user requirements. Supports multiple programming languages.
 7. **Browser Interaction Module**: Enables Devika to navigate websites, extract information, and interact with web elements as needed.
 8. **Knowledge Base**: Stores and retrieves project-specific information, code snippets, and learned knowledge for efficient access.
 9. **Database**: Persists project data, agent states, and configuration settings.
 Read [ARCHITECTURE.md](https://github.com/stitionai/devika/Docs/architecture/ARCHITECTURE.md) for the detailed architecture of Devika.
 Read [UNDER_THE_HOOD.md](https://github.com/stitionai/devika/Docs/architecture/UNDER_THE_HOOD.md) for the detailed working of Devika.
--- a/docs/architecture/UNDER_THE_HOOD.md
+++ b/docs/architecture/UNDER_THE_HOOD.md
@ -0,0 +1,50 @@
 ## Under The Hood
 Let's dive deeper into some of the key components and techniques used in Devika:
 ### AI Planning and Reasoning
 Devika employs advanced AI planning and reasoning algorithms to break down high-level objectives into actionable steps. The planning process involves the following stages:
 1. **Objective Understanding**: Devika analyzes the given objective or task description to understand the user's intent and requirements.
 2. **Context Gathering**: Relevant context is collected from the conversation history, project files, and knowledge base to inform the planning process.
 3. **Step Generation**: Based on the objective and context, Devika generates a sequence of high-level steps to accomplish the task.
 4. **Refinement and Validation**: The generated steps are refined and validated to ensure their feasibility and alignment with the objective.
 5. **Execution**: Devika executes each step in the plan, utilizing various sub-agents and modules as needed.
 The reasoning engine constantly evaluates the progress and makes adjustments to the plan based on new information or feedback received during execution.
 ### Keyword Extraction
 To enable focused research and information gathering, Devika employs keyword extraction techniques. The process involves the following steps:
 1. **Preprocessing**: The input text (objective, conversation history, or project files) is preprocessed by removing stop words, tokenizing, and normalizing the text.
 2. **Keyword Identification**: Devika uses the BERT (Bidirectional Encoder Representations from Transformers) model to identify important keywords and phrases from the preprocessed text. BERT's pre-training on a large corpus allows it to capture semantic relationships and understand the significance of words in the given context.
 3. **Keyword Ranking**: The identified keywords are ranked based on their relevance and importance to the task at hand. Techniques like TF-IDF (Term Frequency-Inverse Document Frequency) and TextRank are used to assign scores to each keyword.
 4. **Keyword Selection**: The top-ranked keywords are selected as the most relevant and informative for the current context. These keywords are used to guide the research and information gathering process.
 By extracting contextually relevant keywords, Devika can focus its research efforts and retrieve pertinent information to assist in the task completion.
 ### Browser Interaction
 Devika incorporates browser interaction capabilities to navigate websites, extract information, and interact with web elements. The browser interaction module leverages the Playwright library to automate web interactions. The process involves the following steps:
 1. **Navigation**: Devika uses Playwright to navigate to specific URLs or perform searches based on the keywords or requirements provided.
 2. **Element Interaction**: Playwright allows Devika to interact with web elements such as clicking buttons, filling forms, and extracting text from specific elements.
 3. **Page Parsing**: Devika parses the HTML structure of the web pages visited to extract relevant information. It uses techniques like CSS selectors and XPath to locate and extract specific data points.
 4. **JavaScript Execution**: Playwright enables Devika to execute JavaScript code within the browser context, allowing for dynamic interactions and data retrieval.
 5. **Screenshot Capture**: Devika can capture screenshots of the web pages visited, which can be useful for visual reference or debugging purposes.
 The browser interaction module empowers Devika to gather information from the web, interact with online resources, and incorporate real-time data into its decision-making and code generation processes.
 ### Code Writing
 Devika's code writing module generates code based on the plan, research findings, and user requirements. The process involves the following steps:
 1. **Language Selection**: Devika identifies the programming language specified by the user or infers it based on the project context.
 2. **Code Structure Generation**: Based on the plan and language-specific patterns, Devika generates the high-level structure of the code, including classes, functions, and modules.
 3. **Code Population**: Devika fills in the code structure with specific logic, algorithms, and data manipulation statements. It leverages the research findings, code snippets from the knowledge base, and its own understanding of programming concepts to generate meaningful code.
 4. **Code Formatting**: The generated code is formatted according to the language-specific conventions and best practices to ensure readability and maintainability.
 5. **Code Review and Refinement**: Devika reviews the generated code for syntax errors, logical inconsistencies, and potential improvements. It iteratively refines the code based on its own analysis and any feedback provided by the user.
 Devika's code writing capabilities enable it to generate functional and efficient code in various programming languages, taking into account the specific requirements and context of each project.
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,32 @@
 flask
 flask-cors
 toml
 urllib3
 requests
 colorama
 fastlogging
 Jinja2
 mistletoe
 markdownify
 pdfminer.six
 playwright
 pytest-playwright
 tiktoken
 ollama
 openai
 anthropic
 google-generativeai
 sqlmodel
 keybert
 GitPython
 netlify-py
 Markdown
 xhtml2pdf
 mistralai
 Flask-SocketIO
 eventlet
 groq
 duckduckgo-search
 orjson
 gevent
 gevent-websocket
--- a/sample.config.toml
+++ b/sample.config.toml
@ -0,0 +1,31 @@
 [STORAGE]
 SQLITE_DB = "data/db/devika.db"
 SCREENSHOTS_DIR = "data/screenshots"
 PDFS_DIR = "data/pdfs"
 PROJECTS_DIR = "data/projects"
 LOGS_DIR = "data/logs"
 REPOS_DIR = "data/repos"
 [API_KEYS]
 BING = "<YOUR_BING_API_KEY>"
 GOOGLE_SEARCH = "<YOUR_GOOGLE_SEARCH_API_KEY>"
 GOOGLE_SEARCH_ENGINE_ID = "<YOUR_GOOGLE_SEARCH_ENGINE_ID>"
 CLAUDE = "<YOUR_CLAUDE_API_KEY>"
 OPENAI = "<YOUR_OPENAI_API_KEY>"
 GEMINI = "<YOUR_GEMINI_API_KEY>"
 MISTRAL = "<YOUR_MISTRAL_API_KEY>"
 GROQ = "<YOUR_GROQ_API_KEY>"
 NETLIFY = "<YOUR_NETLIFY_API_KEY>"
 [API_ENDPOINTS]
 BING = "https://api.bing.microsoft.com/v7.0/search"
 GOOGLE = "https://www.googleapis.com/customsearch/v1"
 OLLAMA = "http://127.0.0.1:11434"
 OPENAI = "https://api.openai.com/v1"
 [LOGGING]
 LOG_REST_API = "true"
 LOG_PROMPTS = "false"
 [TIMEOUT]
 INFERENCE = 60
--- a/setup.sh
+++ b/setup.sh
@ -0,0 +1,7 @@
 #!/bin/bash
 pip3 install -r requirements.txt
 playwright install
 python3 -m playwright install-deps
 cd ui/
 bun install
--- a/src/agents/init.py
+++ b/src/agents/init.py
@ -0,0 +1,9 @@
 from .agent import Agent
 from .planner import Planner
 from .internal_monologue import InternalMonologue
 from .researcher import Researcher
 from .formatter import Formatter
 from .coder import Coder
 from .action import Action
 from .runner import Runner
--- a/src/agents/action/init.py
+++ b/src/agents/action/init.py
@ -0,0 +1 @@
 from .action import Action
--- a/src/agents/action/action.py
+++ b/src/agents/action/action.py
@ -0,0 +1,41 @@
 import json
 from jinja2 import Environment, BaseLoader
 from src.services.utils import retry_wrapper, validate_responses
 from src.config import Config
 from src.llm import LLM
 PROMPT = open("src/agents/action/prompt.jinja2", "r").read().strip()
 class Action:
    def __init__(self, base_model: str):
        config = Config()
        self.project_dir = config.get_projects_dir()
        self.llm = LLM(model_id=base_model)
    def render(
        self, conversation: str
    ) -> str:
        env = Environment(loader=BaseLoader())
        template = env.from_string(PROMPT)
        return template.render(
            conversation=conversation
        )
    @validate_responses
    def validate_response(self, response: str):
        if "response" not in response and "action" not in response:
            return False
        else:
            return response["response"], response["action"]
    @retry_wrapper
    def execute(self, conversation: list, project_name: str) -> str:
        prompt = self.render(conversation)
        response = self.llm.inference(prompt, project_name)
        valid_response = self.validate_response(response)
        return valid_response
--- a/src/agents/action/prompt.jinja2
+++ b/src/agents/action/prompt.jinja2
@ -0,0 +1,31 @@
 You are Devika, an AI Software Engineer. You have been talking to the user and this is your exchanges so far:
 ```
 {% for message in conversation %}
 {{ message }}
 {% endfor %}
 ```
 User's last message: {{ conversation[-1] }}
 You are now going to respond to the user's last message according to the specific request.
 The user could be asking the following:
 - `answer` - Answer a question about the project.
 - `run` - Run the project.
 - `deploy` - Deploy the project.
 - `feature` - Add a new feature to the project.
 - `bug` - Fix a bug in the project.
 - `report` - Generate a report on the project.
 Your response should be in the following format:
 ```
 {
    "response": "Your human-like response to the user's message here describing the action you are taking."
    "action": "run"
 }
 ```
 The action can only be one, read the user's last message carefully to determine which action to take. Sometimes the user's prompt might indicate multiple actions but you should only take one optimal action and use your answer response to convey what you are doing.
 Any response other than the JSON format will be rejected by the system.
--- a/src/agents/agent.py
+++ b/src/agents/agent.py
@ -0,0 +1,365 @@
 from .planner import Planner
 from .researcher import Researcher
 from .formatter import Formatter
 from .coder import Coder
 from .action import Action
 from .internal_monologue import InternalMonologue
 from .answer import Answer
 from .runner import Runner
 from .feature import Feature
 from .patcher import Patcher
 from .reporter import Reporter
 from .decision import Decision
 from src.project import ProjectManager
 from src.state import AgentState
 from src.logger import Logger
 from src.bert.sentence import SentenceBert
 from src.memory import KnowledgeBase
 from src.browser.search import BingSearch, GoogleSearch, DuckDuckGoSearch
 from src.browser import Browser
 from src.browser import start_interaction
 from src.filesystem import ReadCode
 from src.services import Netlify
 from src.documenter.pdf import PDF
 import json
 import time
 import platform
 import tiktoken
 import asyncio
 from src.socket_instance import emit_agent
 class Agent:
    def __init__(self, base_model: str, search_engine: str, browser: Browser = None):
        if not base_model:
            raise ValueError("base_model is required")
        self.logger = Logger()
        """
        Accumulate contextual keywords from chained prompts of all preparation agents
        """
        self.collected_context_keywords = []
        """
        Agents
        """
        self.planner = Planner(base_model=base_model)
        self.researcher = Researcher(base_model=base_model)
        self.formatter = Formatter(base_model=base_model)
        self.coder = Coder(base_model=base_model)
        self.action = Action(base_model=base_model)
        self.internal_monologue = InternalMonologue(base_model=base_model)
        self.answer = Answer(base_model=base_model)
        self.runner = Runner(base_model=base_model)
        self.feature = Feature(base_model=base_model)
        self.patcher = Patcher(base_model=base_model)
        self.reporter = Reporter(base_model=base_model)
        self.decision = Decision(base_model=base_model)
        self.project_manager = ProjectManager()
        self.agent_state = AgentState()
        self.engine = search_engine
        self.tokenizer = tiktoken.get_encoding("cl100k_base")
    async def open_page(self, project_name, url):
        browser = await Browser().start()
        await browser.go_to(url)
        _, raw = await browser.screenshot(project_name)
        data = await browser.extract_text()
        await browser.close()
        return browser, raw, data
    def search_queries(self, queries: list, project_name: str) -> dict:
        results = {}
        knowledge_base = KnowledgeBase()
        if self.engine == "bing":
            web_search = BingSearch()
        elif self.engine == "google":
            web_search = GoogleSearch()
        else:
            web_search = DuckDuckGoSearch()
        self.logger.info(f"\nSearch Engine :: {self.engine}")
        for query in queries:
            query = query.strip().lower()
            # knowledge = knowledge_base.get_knowledge(tag=query)
            # if knowledge:
            #     results[query] = knowledge
            #     continue
            loop = asyncio.new_event_loop()
            asyncio.set_event_loop(loop)
            web_search.search(query)
            link = web_search.get_first_link()
            print("\nLink :: ", link, '\n')
            if not link:
                continue
            browser, raw, data = loop.run_until_complete(self.open_page(project_name, link))
            emit_agent("screenshot", {"data": raw, "project_name": project_name}, False)
            results[query] = self.formatter.execute(data, project_name)
            self.logger.info(f"got the search results for : {query}")
            # knowledge_base.add_knowledge(tag=query, contents=results[query])
        return results
    def update_contextual_keywords(self, sentence: str):
        """
            Update the context keywords with the latest sentence/prompt
        """
        keywords = SentenceBert(sentence).extract_keywords()
        for keyword in keywords:
            self.collected_context_keywords.append(keyword[0])
        return self.collected_context_keywords
    def make_decision(self, prompt: str, project_name: str) -> str:
        decision = self.decision.execute(prompt, project_name)
        for item in decision:
            function = item["function"]
            args = item["args"]
            reply = item["reply"]
            self.project_manager.add_message_from_devika(project_name, reply)
            if function == "git_clone":
                url = args["url"]
                # Implement git clone functionality here
            elif function == "generate_pdf_document":
                user_prompt = args["user_prompt"]
                # Call the reporter agent to generate the PDF document
                markdown = self.reporter.execute([user_prompt], "", project_name)
                _out_pdf_file = PDF().markdown_to_pdf(markdown, project_name)
                project_name_space_url = project_name.replace(" ", "%20")
                pdf_download_url = "http://127.0.0.1:1337/api/download-project-pdf?project_name={}".format(
                    project_name_space_url)
                response = f"I have generated the PDF document. You can download it from here: {pdf_download_url}"
                #asyncio.run(self.open_page(project_name, pdf_download_url))
                self.project_manager.add_message_from_devika(project_name, response)
            elif function == "browser_interaction":
                user_prompt = args["user_prompt"]
                # Call the interaction agent to interact with the browser
                start_interaction(self.base_model, user_prompt, project_name)
            elif function == "coding_project":
                user_prompt = args["user_prompt"]
                # Call the planner, researcher, coder agents in sequence
                plan = self.planner.execute(user_prompt, project_name)
                planner_response = self.planner.parse_response(plan)
                research = self.researcher.execute(plan, self.collected_context_keywords, project_name)
                search_results = self.search_queries(research["queries"], project_name)
                code = self.coder.execute(
                    step_by_step_plan=plan,
                    user_context=research["ask_user"],
                    search_results=search_results,
                    project_name=project_name
                )
                self.coder.save_code_to_project(code, project_name)
    def subsequent_execute(self, prompt: str, project_name: str):
        """
        Subsequent flow of execution
        """
        new_message = self.project_manager.new_message()
        new_message['message'] = prompt
        new_message['from_devika'] = False
        self.project_manager.add_message_from_user(project_name, new_message['message'])
        os_system = platform.platform()
        self.agent_state.set_agent_active(project_name, True)
        conversation = self.project_manager.get_all_messages_formatted(project_name)
        code_markdown = ReadCode(project_name).code_set_to_markdown()
        response, action = self.action.execute(conversation, project_name)
        self.project_manager.add_message_from_devika(project_name, response)
        print("\naction :: ", action, '\n')
        if action == "answer":
            response = self.answer.execute(
                conversation=conversation,
                code_markdown=code_markdown,
                project_name=project_name
            )
            self.project_manager.add_message_from_devika(project_name, response)
        elif action == "run":
            project_path = self.project_manager.get_project_path(project_name)
            self.runner.execute(
                conversation=conversation,
                code_markdown=code_markdown,
                os_system=os_system,
                project_path=project_path,
                project_name=project_name
            )
        elif action == "deploy":
            deploy_metadata = Netlify().deploy(project_name)
            deploy_url = deploy_metadata["deploy_url"]
            response = {
                "message": "Done! I deployed your project on Netlify.",
                "deploy_url": deploy_url
            }
            response = json.dumps(response, indent=4)
            self.project_manager.add_message_from_devika(project_name, response)
        elif action == "feature":
            code = self.feature.execute(
                conversation=conversation,
                code_markdown=code_markdown,
                system_os=os_system,
                project_name=project_name
            )
            print("\nfeature code :: ", code, '\n')
            self.feature.save_code_to_project(code, project_name)
        elif action == "bug":
            code = self.patcher.execute(
                conversation=conversation,
                code_markdown=code_markdown,
                commands=None,
                error=prompt,
                system_os=os_system,
                project_name=project_name
            )
            print("\nbug code :: ", code, '\n')
            self.patcher.save_code_to_project(code, project_name)
        elif action == "report":
            markdown = self.reporter.execute(conversation, code_markdown, project_name)
            _out_pdf_file = PDF().markdown_to_pdf(markdown, project_name)
            project_name_space_url = project_name.replace(" ", "%20")
            pdf_download_url = "http://127.0.0.1:1337/api/download-project-pdf?project_name={}".format(
                project_name_space_url)
            response = f"I have generated the PDF document. You can download it from here: {pdf_download_url}"
            #asyncio.run(self.open_page(project_name, pdf_download_url))
            self.project_manager.add_message_from_devika(project_name, response)
        self.agent_state.set_agent_active(project_name, False)
        self.agent_state.set_agent_completed(project_name, True)
    def execute(self, prompt: str, project_name: str) -> str:
        """
        Agentic flow of execution
        """
        if project_name:
            self.project_manager.add_message_from_user(project_name, prompt)
        self.agent_state.create_state(project=project_name)
        plan = self.planner.execute(prompt, project_name)
        print("\nplan :: ", plan, '\n')
        planner_response = self.planner.parse_response(plan)
        reply = planner_response["reply"]
        focus = planner_response["focus"]
        plans = planner_response["plans"]
        summary = planner_response["summary"]
        self.project_manager.add_message_from_devika(project_name, reply)
        self.project_manager.add_message_from_devika(project_name, json.dumps(plans, indent=4))
        # self.project_manager.add_message_from_devika(project_name, f"In summary: {summary}")
        self.update_contextual_keywords(focus)
        print("\ncontext_keywords :: ", self.collected_context_keywords, '\n')
        internal_monologue = self.internal_monologue.execute(current_prompt=plan, project_name=project_name)
        print("\ninternal_monologue :: ", internal_monologue, '\n')
        new_state = self.agent_state.new_state()
        new_state["internal_monologue"] = internal_monologue
        self.agent_state.add_to_current_state(project_name, new_state)
        research = self.researcher.execute(plan, self.collected_context_keywords, project_name=project_name)
        print("\nresearch :: ", research, '\n')
        queries = research["queries"]
        queries_combined = ", ".join(queries)
        ask_user = research["ask_user"]
        if (queries and len(queries) > 0) or ask_user != "":
            self.project_manager.add_message_from_devika(
                project_name,
                f"I am browsing the web to research the following queries: {queries_combined}."
                f"\n If I need anything, I will make sure to ask you."
            )
        if not queries and len(queries) == 0:
            self.project_manager.add_message_from_devika(
                project_name,
                "I think I can proceed without searching the web."
            )
        ask_user_prompt = "Nothing from the user."
        if ask_user != "" and ask_user is not None:
            self.project_manager.add_message_from_devika(project_name, ask_user)
            self.agent_state.set_agent_active(project_name, False)
            got_user_query = False
            while not got_user_query:
                self.logger.info("Waiting for user query...")
                latest_message_from_user = self.project_manager.get_latest_message_from_user(project_name)
                validate_last_message_is_from_user = self.project_manager.validate_last_message_is_from_user(
                    project_name)
                if latest_message_from_user and validate_last_message_is_from_user:
                    ask_user_prompt = latest_message_from_user["message"]
                    got_user_query = True
                    self.project_manager.add_message_from_devika(project_name, "Thanks! 🙌")
                time.sleep(5)
        self.agent_state.set_agent_active(project_name, True)
        if queries and len(queries) > 0:
            search_results = self.search_queries(queries, project_name)
        else:
            search_results = {}
        code = self.coder.execute(
            step_by_step_plan=plan,
            user_context=ask_user_prompt,
            search_results=search_results,
            project_name=project_name
        )
        print("\ncode :: ", code, '\n')
        self.coder.save_code_to_project(code, project_name)
        self.agent_state.set_agent_active(project_name, False)
        self.agent_state.set_agent_completed(project_name, True)
        self.project_manager.add_message_from_devika(
            project_name,
            "I have completed the my task. \n"
            "if you would like me to do anything else, please let me know. \n"
        )
--- a/src/agents/answer/init.py
+++ b/src/agents/answer/init.py
@ -0,0 +1 @@
 from .answer import Answer
--- a/src/agents/answer/answer.py
+++ b/src/agents/answer/answer.py
@ -0,0 +1,42 @@
 import json
 from jinja2 import Environment, BaseLoader
 from src.services.utils import retry_wrapper, validate_responses
 from src.config import Config
 from src.llm import LLM
 PROMPT = open("src/agents/answer/prompt.jinja2", "r").read().strip()
 class Answer:
    def __init__(self, base_model: str):
        config = Config()
        self.project_dir = config.get_projects_dir()
        self.llm = LLM(model_id=base_model)
    def render(
        self, conversation: str, code_markdown: str
    ) -> str:
        env = Environment(loader=BaseLoader())
        template = env.from_string(PROMPT)
        return template.render(
            conversation=conversation,
            code_markdown=code_markdown
        )
    @validate_responses
    def validate_response(self, response: str):
        if "response" not in response:
            return False
        else:
            return response["response"]
    @retry_wrapper
    def execute(self, conversation: list, code_markdown: str, project_name: str) -> str:
        prompt = self.render(conversation, code_markdown)
        response = self.llm.inference(prompt, project_name)
        valid_response = self.validate_response(response)
        return valid_response
--- a/src/agents/answer/prompt.jinja2
+++ b/src/agents/answer/prompt.jinja2
@ -0,0 +1,27 @@
 You are Devika, an AI Software Engineer. You have been talking to the user and this is your exchange so far:
 ```
 {% for message in conversation %}
 {{ message }}
 {% endfor %}
 ```
 Full Code:
 ~~~
 {{ code_markdown }}
 ~~~
 User's last message: {{ conversation[-1] }}
 Your response should be in the following format:
 ```
 {
    "response": "Your human-like response to the user's last message."
 }
 ```
 Rules:
 - Read the full context, including the code (if any) carefully to answer the user's prompt.
 - Your response can be as long as possible, but it should be concise and to the point.
 Any response other than the JSON format will be rejected by the system.
--- a/src/agents/coder/init.py
+++ b/src/agents/coder/init.py
@ -0,0 +1 @@
 from .coder import Coder
--- a/src/agents/coder/coder.py
+++ b/src/agents/coder/coder.py
@ -0,0 +1,134 @@
 import os
 import time
 from jinja2 import Environment, BaseLoader
 from typing import List, Dict, Union
 from src.config import Config
 from src.llm import LLM
 from src.state import AgentState
 from src.logger import Logger
 from src.services.utils import retry_wrapper
 from src.socket_instance import emit_agent
 PROMPT = open("src/agents/coder/prompt.jinja2", "r").read().strip()
 class Coder:
    def __init__(self, base_model: str):
        config = Config()
        self.project_dir = config.get_projects_dir()
        self.logger = Logger()
        self.llm = LLM(model_id=base_model)
    def render(
        self, step_by_step_plan: str, user_context: str, search_results: dict
    ) -> str:
        env = Environment(loader=BaseLoader())
        template = env.from_string(PROMPT)
        return template.render(
            step_by_step_plan=step_by_step_plan,
            user_context=user_context,
            search_results=search_results,
        )
    def validate_response(self, response: str) -> Union[List[Dict[str, str]], bool]:
        response = response.strip()
        self.logger.debug(f"Response from the model: {response}")
        if "~~~" not in response:
            return False
        response = response.split("~~~", 1)[1]
        response = response[:response.rfind("~~~")]
        response = response.strip()
        result = []
        current_file = None
        current_code = []
        code_block = False
        for line in response.split("\n"):
            if line.startswith("File: "):
                if current_file and current_code:
                    result.append({"file": current_file, "code": "\n".join(current_code)})
                current_file = line.split(":")[1].strip()
                current_code = []
                code_block = False
            elif line.startswith("```"):
                code_block = not code_block
            else:
                current_code.append(line)
        if current_file and current_code:
            result.append({"file": current_file, "code": "\n".join(current_code)})
        return result
    def save_code_to_project(self, response: List[Dict[str, str]], project_name: str):
        file_path_dir = None
        project_name = project_name.lower().replace(" ", "-")
        for file in response:
            file_path = os.path.join(self.project_dir, project_name, file['file'])
            file_path_dir = os.path.dirname(file_path)
            os.makedirs(file_path_dir, exist_ok=True)
            with open(file_path, "w", encoding="utf-8") as f:
                f.write(file["code"])
        return file_path_dir
    def get_project_path(self, project_name: str):
        project_name = project_name.lower().replace(" ", "-")
        return f"{self.project_dir}/{project_name}"
    def response_to_markdown_prompt(self, response: List[Dict[str, str]]) -> str:
        response = "\n".join([f"File: `{file['file']}`:\n```\n{file['code']}\n```" for file in response])
        return f"~~~\n{response}\n~~~"
    def emulate_code_writing(self, code_set: list, project_name: str):
        files = []
        for current_file in code_set:
            file = current_file["file"]
            code = current_file["code"]
            current_state = AgentState().get_latest_state(project_name)
            new_state = AgentState().new_state()
            new_state["browser_session"] = current_state["browser_session"] # keep the browser session
            new_state["internal_monologue"] = "Writing code..."
            new_state["terminal_session"]["title"] = f"Editing {file}"
            new_state["terminal_session"]["command"] = f"vim {file}"
            new_state["terminal_session"]["output"] = code
            files.append({
                "file": file,
                "code": code
            })
            AgentState().add_to_current_state(project_name, new_state)
            time.sleep(2)
        emit_agent("code", {
            "files": files,
            "from": "coder"
        })
    @retry_wrapper
    def execute(
        self,
        step_by_step_plan: str,
        user_context: str,
        search_results: dict,
        project_name: str
    ) -> str:
        prompt = self.render(step_by_step_plan, user_context, search_results)
        response = self.llm.inference(prompt, project_name)
        valid_response = self.validate_response(response)
        if not valid_response:
            return False
        print(valid_response)
        self.emulate_code_writing(valid_response, project_name)
        return valid_response
--- a/src/agents/coder/prompt.jinja2
+++ b/src/agents/coder/prompt.jinja2
@ -0,0 +1,68 @@
 Project Step-by-step Plan:
 ```
 {{ step_by_step_plan }}
 ```
 Context From User:
 ```
 {{ user_context }}
 ```
 Context From Knowledge Base:
 {% if not knowledge_base_context %}
 No context found.
 {% else %}
 {% for query, result in search_results.items() %}
 Query: {{ query }}
 Result:
 ```
 {{ result }}
 ```
 ---
 {% endfor %}
 {% endif %}
 Read the step-by-step plan carefully. Think step-by-step. Learn relevant information from the knowledge base context. Then write the code to implement the step-by-step plan.
 Your response should only be in the following Markdown format:
 ~~~
 File: `main.py`:
 ```py
 print("Example")
 ```
 File: `src/main.rs`:
 ```rs
 fn main() {
    println!("Example");
 }
 ```
 File: `nested/directory/example/code.py`:
 ```py
 print("Example")
 ```
 File: `README.md`
 ```md
 # Example
 This is an example.
 ```
 ~~~
 Rules:
 - You should write clean and documented code.
 - The code should work on the first try without any errors or bugs.
 - Choose the library or dependency you know best.
 - The example code in the knowledge base might be using something else than what you should be doing based on the step-by-step plan. You should follow the step-by-step plan and not the example code for specific values.
 - The extension used for the Markdown code blocks should be accurate.
 - Nested directories should be specified in the Markdown filename, the code structure should be accurate. If it requires to be nested to work, then it should be nested.
 - You need to include required files for the code to run. Like: requirements.txt, Cargo.toml, etc.
 - Files like Cargo.toml are mandatory to be included, the project won't work without it.
 Any response other than this format will be rejected. You should not refuse to complete the task, you should try your absolute best and if there's any implementation detail that's impossible to complete, you should write a comment in the code explaining why it's impossible to complete. The refusal is only a last resort, it should never happen.
 Your response should start with "~~~" and end with "~~~" just like the example format provided. Never provide any explanation or context inside the response, only the filenames and the code in the format provided. Do not leave any "Note".
--- a/src/agents/decision/init.py
+++ b/src/agents/decision/init.py
@ -0,0 +1 @@
 from .decision import Decision
--- a/src/agents/decision/decision.py
+++ b/src/agents/decision/decision.py
@ -0,0 +1,34 @@
 import json
 from jinja2 import Environment, BaseLoader
 from src.services.utils import retry_wrapper, validate_responses
 from src.llm import LLM
 PROMPT = open("src/agents/decision/prompt.jinja2").read().strip()
 class Decision:
    def __init__(self, base_model: str):
        self.llm = LLM(model_id=base_model)
    def render(self, prompt: str) -> str:
        env = Environment(loader=BaseLoader())
        template = env.from_string(PROMPT)
        return template.render(prompt=prompt)
    @validate_responses
    def validate_response(self, response: str):
        for item in response:
            if "function" not in item or "args" not in item or "reply" not in item:
                return False
        return response
    @retry_wrapper
    def execute(self, prompt: str, project_name: str) -> str:
        rendered_prompt = self.render(prompt)
        response = self.llm.inference(rendered_prompt, project_name)
        valid_response = self.validate_response(response)
        return valid_response
--- a/src/agents/decision/prompt.jinja2
+++ b/src/agents/decision/prompt.jinja2
@ -0,0 +1,85 @@
 You are Devika, an AI software engineer. You are given the following prompt from the user:
 ```
 {{ prompt }}
 ```
 From this prompt, you have to chain function calls from the following options that can accomplish the user's request in the most optimal way.
 JSON Functions:
 ## `git_clone`:
 Description: The user's request includes a GitHub URL, and you have to clone the repository to the user's local machine.
 Usage:
 ```
 {
    "function": "git_clone",
    "args": {
        "url": "<GitHub URL from the user>"
    },
    "reply": "<Inform the user what you're doing here in a human-like response>"
 }
 ```
 ## `generate_pdf_document`:
 Description: The user's request is to create a document for the following: Report, Documentation, Project Technical Document, Workshop Material, Homework, Assignment, or any other document.
 Usage:
 ```
 {
    "function": "generate_pdf_document",
    "args": {
        "user_prompt": "<Write the user's prompt but even more verbose and detailed>"
    },
    "reply": "<Inform the user what you're doing here in a human-like response>"
 }
 ```
 ## `browser_interaction`:
 Description: The user's request is to interact with a website. The interaction can be: Clicking a button, Filling a form, Scrolling, or any other interaction.
 The user might be asking you to post something on Twitter or Reddit or even searching something on Google.
 Usage:
 ```
 {
    "function": "browser_interaction",
    "args": {
        "user_prompt": "<Write the user's prompt but even more verbose and detailed>"
    },
    "reply": "<Inform the user what you're doing here in a human-like response>"
 }
 ```
 ## `coding_project`
 Description: The user's request is to create a coding project. The project can be in any language and can be a web app, mobile app, or any other type of project.
 Usage:
 ```
 {
    "function": "coding_project",
    "args": {
        "user_prompt": "<Write the user's prompt but even more verbose and detailed>"
    },
    "reply": "<Inform the user what you're doing here in a human-like response>"
 }
 ```
 Response Format:
 ```
 [
    {
        "function": "git_clone",
        "args": {
            "url": "https://github.com/username/repo"
        },
        "reply": "<Inform the user what you're doing here in a human-like response>"
    },
    {
        "function": "generate_pdf_document",
        "args": {
            "user_prompt": "I want to create a report on the project"
        },
        "reply": "<Inform the user what you're doing here in a human-like response>"
 ]
 ```
 Your response should only be the JSON object with the function and the arguments and nothing else. Any other format of response will be rejected by the system.
--- a/src/agents/feature/init.py
+++ b/src/agents/feature/init.py
@ -0,0 +1 @@
 from .feature import Feature
--- a/src/agents/feature/feature.py
+++ b/src/agents/feature/feature.py
@ -0,0 +1,128 @@
 import os
 import time
 from jinja2 import Environment, BaseLoader
 from typing import List, Dict, Union
 from src.config import Config
 from src.llm import LLM
 from src.state import AgentState
 from src.services.utils import retry_wrapper
 from src.socket_instance import emit_agent
 PROMPT = open("src/agents/feature/prompt.jinja2", "r").read().strip()
 class Feature:
    def __init__(self, base_model: str):
        config = Config()
        self.project_dir = config.get_projects_dir()
        self.llm = LLM(model_id=base_model)
    def render(
        self,
        conversation: list,
        code_markdown: str,
        system_os: str
    ) -> str:
        env = Environment(loader=BaseLoader())
        template = env.from_string(PROMPT)
        return template.render(
            conversation=conversation,
            code_markdown=code_markdown,
            system_os=system_os
        )
    def validate_response(self, response: str) -> Union[List[Dict[str, str]], bool]:
        response = response.strip()
        response = response.split("~~~", 1)[1]
        response = response[:response.rfind("~~~")]
        response = response.strip()
        result = []
        current_file = None
        current_code = []
        code_block = False
        for line in response.split("\n"):
            if line.startswith("File: "):
                if current_file and current_code:
                    result.append({"file": current_file, "code": "\n".join(current_code)})
                current_file = line.split("`")[1].strip()
                current_code = []
                code_block = False
            elif line.startswith("```"):
                code_block = not code_block
            else:
                current_code.append(line)
        if current_file and current_code:
            result.append({"file": current_file, "code": "\n".join(current_code)})
        return result
    def save_code_to_project(self, response: List[Dict[str, str]], project_name: str):
        file_path_dir = None
        project_name = project_name.lower().replace(" ", "-")
        for file in response:
            file_path = os.path.join(self.project_dir, project_name, file['file'])
            file_path_dir = os.path.dirname(file_path)
            os.makedirs(file_path_dir, exist_ok=True)
            with open(file_path, "w", encoding="utf-8") as f:
                f.write(file["code"])
        return file_path_dir
    def get_project_path(self, project_name: str):
        project_name = project_name.lower().replace(" ", "-")
        return f"{self.project_dir}/{project_name}"
    def response_to_markdown_prompt(self, response: List[Dict[str, str]]) -> str:
        response = "\n".join([f"File: `{file['file']}`:\n```\n{file['code']}\n```" for file in response])
        return f"~~~\n{response}\n~~~"
    def emulate_code_writing(self, code_set: list, project_name: str):
        files = []
        for file in code_set:
            filename = file["file"]
            code = file["code"]
            new_state = AgentState().new_state()
            new_state["internal_monologue"] = "Writing code..."
            new_state["terminal_session"]["title"] = f"Editing {filename}"
            new_state["terminal_session"]["command"] = f"vim {filename}"
            new_state["terminal_session"]["output"] = code
            files.append({
                "file": filename,
                "code": code,
            })
            AgentState().add_to_current_state(project_name, new_state)
            time.sleep(1)
        emit_agent("code", {
            "files": files,
            "from": "feature"
        })
    @retry_wrapper
    def execute(
        self,
        conversation: list,
        code_markdown: str,
        system_os: str,
        project_name: str
    ) -> str:
        prompt = self.render(conversation, code_markdown, system_os)
        response = self.llm.inference(prompt, project_name)
        valid_response = self.validate_response(response)
        if not valid_response:
            return False
        self.emulate_code_writing(valid_response, project_name)
        return valid_response
--- a/src/agents/feature/prompt.jinja2
+++ b/src/agents/feature/prompt.jinja2
@ -0,0 +1,57 @@
 You are Devika, an AI Software Engineer. You have been talking to the user and this is the exchange so far:
 ```
 {% for message in conversation %}
 {{ message }}
 {% endfor %}
 ```
 Full Code:
 ~~~
 {{ code_markdown }}
 ~~~
 User wants the following feature to be implemented: {{ conversation[-1] }}
 System Operating System: {{ system_os }}
 Read the user's feature request carefully. Think step-by-step.
 Rules:
 - You should write clean and documented code.
 - The code should work on the first try without any errors or bugs.
 - Choose the library or dependency you know best.
 - The extension used for the Markdown code blocks should be accurate.
 - You should respond with the complete rewritten code with no implementation detail left. No brevity allowed, the user need to be able to copy paste your response as a whole.
 Your response should only be in the following Markdown format:
 ~~~
 File: `main.py`:
 ```py
 print("Example")
 ```
 File: `src/example.rs`:
 ```rs
 fn example() {
    println!("Example");
 }
 ```
 File: `nested/directory/example/code.py`:
 ```py
 print("Example")
 ```
 File: `README.md`
 ```md
 # Example
 This is an example.
 ```
 ~~~
 Any response other than this format will be rejected. You should not refuse to complete the task, you should try your absolute best and if there's any implementation detail that's impossible to complete, you should write a comment in the code explaining why it's impossible to complete. The refusal is only a last resort, it should never happen.
 Your response should start with "~~~" and end with "~~~" just like the example format provided. Never provide any explanation or context inside the response, only the filenames and the code in the format provided. Do not leave any "Note".
--- a/src/agents/formatter/init.py
+++ b/src/agents/formatter/init.py
@ -0,0 +1 @@
 from .formatter import Formatter
--- a/src/agents/formatter/formatter.py
+++ b/src/agents/formatter/formatter.py
@ -0,0 +1,22 @@
 from jinja2 import Environment, BaseLoader
 from src.llm import LLM
 PROMPT = open("src/agents/formatter/prompt.jinja2").read().strip()
 class Formatter:
    def __init__(self, base_model: str):
        self.llm = LLM(model_id=base_model)
    def render(self, raw_text: str) -> str:
        env = Environment(loader=BaseLoader())
        template = env.from_string(PROMPT)
        return template.render(raw_text=raw_text)
    def validate_response(self, response: str) -> bool:
        return True
    def execute(self, raw_text: str, project_name: str) -> str:
        raw_text = self.render(raw_text)
        response = self.llm.inference(raw_text, project_name)
        return response
--- a/src/agents/formatter/prompt.jinja2
+++ b/src/agents/formatter/prompt.jinja2
@ -0,0 +1,13 @@
 ```
 {{ raw_text }}
 ```
 You are provided with a raw extracted text from a PDF render of a web page. This web page could be a blog, documentation, or any other type of web page.
 Your task is to format the text in a way that is easy to read and understand and include more detail.
 You are essentially a RAW text to clean Markdown convertor. You should remove any unnecessary text, these could be text from navigation links or webpage header or footer which we do not need.
 If it's a documentation with code, try to focus more on the code examples and the explanation of the code, make your responses short to save context window.
 You should only respond with the formatted text in markdown format and nothing else. Start your response with "```" and end with "```".
--- a/src/agents/internal_monologue/init.py
+++ b/src/agents/internal_monologue/init.py
@ -0,0 +1 @@
 from .internal_monologue import InternalMonologue
--- a/src/agents/internal_monologue/internal_monologue.py
+++ b/src/agents/internal_monologue/internal_monologue.py
@ -0,0 +1,34 @@
 import json
 from jinja2 import Environment, BaseLoader
 from src.llm import LLM
 from src.services.utils import retry_wrapper, validate_responses
 PROMPT = open("src/agents/internal_monologue/prompt.jinja2").read().strip()
 class InternalMonologue:
    def __init__(self, base_model: str):
        self.llm = LLM(model_id=base_model)
    def render(self, current_prompt: str) -> str:
        env = Environment(loader=BaseLoader())
        template = env.from_string(PROMPT)
        return template.render(current_prompt=current_prompt)
    @validate_responses
    def validate_response(self, response: str):
        print('-------------------> ', response)
        print("####", type(response))
        if "internal_monologue" not in response:
            return False
        else:
            return response["internal_monologue"]
    @retry_wrapper
    def execute(self, current_prompt: str, project_name: str) -> str:
        rendered_prompt = self.render(current_prompt)
        response = self.llm.inference(rendered_prompt, project_name)
        valid_response = self.validate_response(response)
        return valid_response
--- a/src/agents/internal_monologue/prompt.jinja2
+++ b/src/agents/internal_monologue/prompt.jinja2
@ -0,0 +1,21 @@
 You are Devika, an AI Software Engineer.
 One of your AI agent module is currently working through the following prompt:
 ```
 {{ current_prompt }}
 ```
 To show the user what you're thinking about or doing, respond with a short human-like response verbalizing your internal monologue.
 Your response should be in the following JSON format:
 ```
 {
    "internal_monologue": "<YOUR INTERNAL MONOLOGUE>"
 }
 ```
 TIP: Make the internal monologue very human-like and conversational. It should be very short and concise.
 Only the provided JSON response format is accepted. Any other response format will be rejected.
--- a/src/agents/patcher/init.py
+++ b/src/agents/patcher/init.py
@ -0,0 +1 @@
 from .patcher import Patcher
--- a/src/agents/patcher/patcher.py
+++ b/src/agents/patcher/patcher.py
@ -0,0 +1,138 @@
 import os
 import time
 from jinja2 import Environment, BaseLoader
 from typing import List, Dict, Union
 from src.socket_instance import emit_agent
 from src.config import Config
 from src.llm import LLM
 from src.state import AgentState
 from src.services.utils import retry_wrapper
 PROMPT = open("src/agents/patcher/prompt.jinja2", "r").read().strip()
 class Patcher:
    def __init__(self, base_model: str):
        config = Config()
        self.project_dir = config.get_projects_dir()
        self.llm = LLM(model_id=base_model)
    def render(
        self,
        conversation: list,
        code_markdown: str,
        commands: list,
        error :str,
        system_os: str
    ) -> str:
        env = Environment(loader=BaseLoader())
        template = env.from_string(PROMPT)
        return template.render(
            conversation=conversation,
            code_markdown=code_markdown,
            commands=commands,
            error=error,
            system_os=system_os
        )
    def validate_response(self, response: str) -> Union[List[Dict[str, str]], bool]:
        response = response.strip()
        response = response.split("~~~", 1)[1]
        response = response[:response.rfind("~~~")]
        response = response.strip()
        result = []
        current_file = None
        current_code = []
        code_block = False
        for line in response.split("\n"):
            if line.startswith("File: "):
                if current_file and current_code:
                    result.append({"file": current_file, "code": "\n".join(current_code)})
                current_file = line.split("`")[1].strip()
                current_code = []
                code_block = False
            elif line.startswith("```"):
                code_block = not code_block
            else:
                current_code.append(line)
        if current_file and current_code:
            result.append({"file": current_file, "code": "\n".join(current_code)})
        return result
    def save_code_to_project(self, response: List[Dict[str, str]], project_name: str):
        file_path_dir = None
        project_name = project_name.lower().replace(" ", "-")
        for file in response:
            file_path = os.path.join(self.project_dir, project_name, file['file'])
            file_path_dir = os.path.dirname(file_path)
            os.makedirs(file_path_dir, exist_ok=True)
            with open(file_path, "w", encoding="utf-8") as f:
                f.write(file["code"])
        return file_path_dir
    def get_project_path(self, project_name: str):
        project_name = project_name.lower().replace(" ", "-")
        return f"{self.project_dir}/{project_name}"
    def response_to_markdown_prompt(self, response: List[Dict[str, str]]) -> str:
        response = "\n".join([f"File: `{file['file']}`:\n```\n{file['code']}\n```" for file in response])
        return f"~~~\n{response}\n~~~"
    def emulate_code_writing(self, code_set: list, project_name: str):
        files = []
        for current_file in code_set:
            file = current_file["file"]
            code = current_file["code"]
            new_state = AgentState().new_state()
            new_state["internal_monologue"] = "Writing code..."
            new_state["terminal_session"]["title"] = f"Editing {file}"
            new_state["terminal_session"]["command"] = f"vim {file}"
            new_state["terminal_session"]["output"] = code
            files.append({
                "file": file,
                "code": code
            })
            AgentState().add_to_current_state(project_name, new_state)
            time.sleep(1)
        emit_agent("code", {
            "files": files,
            "from": "patcher"
        })
    @retry_wrapper
    def execute(
        self,
        conversation: str,
        code_markdown: str,
        commands: list,
        error: str,
        system_os: dict,
        project_name: str
    ) -> str:
        prompt = self.render(
            conversation,
            code_markdown,
            commands,
            error,
            system_os
        )
        response = self.llm.inference(prompt, project_name)
        valid_response = self.validate_response(response)
        if not valid_response:
            return False
        self.emulate_code_writing(valid_response, project_name)
        return valid_response
--- a/src/agents/patcher/prompt.jinja2
+++ b/src/agents/patcher/prompt.jinja2
@ -0,0 +1,72 @@
 You are Devika, an AI Software Engineer. You have been talking to the user and this is the exchange so far:
 ```
 {% for message in conversation %}
 {{ message }}
 {% endfor %}
 ```
 Full Code:
 ~~~
 {{ code_markdown }}
 ~~~
 {% if commands %}
 You tried to execute the following commands to run this project:
 ```
 {% for command in commands %}
 $ {{ command }}
 {% endfor %}
 ```
 {% endif %}
 {% if error %}
 But it resulted in the following error:
 ```
 $ {{ commands[-1] }}
 {{ error }}
 ```
 {% endif %}
 System Operating System: {{ system_os }}
 Read the encountered bug carefully and reason with the code to identify the problem. Think step-by-step.
 Rules:
 - You should write clean and documented code.
 - The code should work on the first try without any errors or bugs.
 - Choose the library or dependency you know best.
 - The extension used for the Markdown code blocks should be accurate.
 - You should respond with the complete rewritten code with no implementation detail left. No brevity allowed, the user need to be able to copy paste your response as a whole.
 Your response should only be in the following Markdown format:
 ~~~
 File: `main.py`:
 ```py
 print("Example")
 ```
 File: `src/example.rs`:
 ```rs
 fn example() {
    println!("Example");
 }
 ```
 File: `nested/directory/example/code.py`:
 ```py
 print("Example")
 ```
 File: `README.md`
 ```md
 # Example
 This is an example.
 ```
 ~~~
 Any response other than this format will be rejected. You should not refuse to complete the task, you should try your absolute best and if there's any implementation detail that's impossible to complete, you should write a comment in the code explaining why it's impossible to complete. The refusal is only a last resort, it should never happen.
 Your response should start with "~~~" and end with "~~~" just like the example format provided. Never provide any explanation or context inside the response, only the filenames and the code in the format provided. Do not leave any "Note".
--- a/src/agents/planner/init.py
+++ b/src/agents/planner/init.py
@ -0,0 +1 @@
 from .planner import Planner
--- a/src/agents/planner/planner.py
+++ b/src/agents/planner/planner.py
@ -0,0 +1,71 @@
 from jinja2 import Environment, BaseLoader
 from src.llm import LLM
 PROMPT = open("src/agents/planner/prompt.jinja2").read().strip()
 class Planner:
    def __init__(self, base_model: str):
        self.llm = LLM(model_id=base_model)
    def render(self, prompt: str) -> str:
        env = Environment(loader=BaseLoader())
        template = env.from_string(PROMPT)
        return template.render(prompt=prompt)
    def validate_response(self, response: str) -> bool:
        return True
    def parse_response(self, response: str):
        result = {
            "project": "",
            "reply": "",
            "focus": "",
            "plans": {},
            "summary": ""
        }
        current_section = None
        current_step = None
        for line in response.split("\n"):
            line = line.strip()
            if line.startswith("Project Name:"):
                current_section = "project"
                result["project"] = line.split(":", 1)[1].strip()            
            elif line.startswith("Your Reply to the Human Prompter:"):
                current_section = "reply"
                result["reply"] = line.split(":", 1)[1].strip()
            elif line.startswith("Current Focus:"):
                current_section = "focus"
                result["focus"] = line.split(":", 1)[1].strip()
            elif line.startswith("Plan:"):
                current_section = "plans"
            elif line.startswith("Summary:"):
                current_section = "summary"
                result["summary"] = line.split(":", 1)[1].strip()
            elif current_section == "reply":
                result["reply"] += " " + line
            elif current_section == "focus":
                result["focus"] += " " + line
            elif current_section == "plans":
                if line.startswith("- [ ] Step"):
                    current_step = line.split(":")[0].strip().split(" ")[-1]
                    result["plans"][int(current_step)] = line.split(":", 1)[1].strip()
                elif current_step:
                    result["plans"][int(current_step)] += " " + line
            elif current_section == "summary":
                result["summary"] += " " + line.replace("```", "")
        result["project"] = result["project"].strip()
        result["reply"] = result["reply"].strip()
        result["focus"] = result["focus"].strip()
        result["summary"] = result["summary"].strip()
        return result    
    def execute(self, prompt: str, project_name: str) -> str:
        prompt = self.render(prompt)
        response = self.llm.inference(prompt, project_name)
        return response
--- a/src/agents/planner/prompt.jinja2
+++ b/src/agents/planner/prompt.jinja2
@ -0,0 +1,36 @@
 You are Devika, an AI Software Engineer.
 The user asked: {{ prompt }}
 Based on the user's request, create a step-by-step plan to accomplish the task.
 Follow this format for your response:
 ```
 Project Name: <Write an apt project name with no longer than 5 words>
 Your Reply to the Human Prompter: <short human-like response to the prompt stating how you are creating the plan, do not start with "As an AI".>
 Current Focus: Briefly state the main objective or focus area for the plan.
 Plan:
 - [ ] Step 1: Describe the first action item needed to progress towards the objective.
 - [ ] Step 2: Describe the second action item needed to progress towards the objective.
 ...
 - [ ] Step N: Describe the final action item needed to complete the objective.
 Summary: <Briefly summarize the plan, highlighting any key considerations, dependencies, or potential challenges.>
 ```
 Each step should be a clear, concise description of a specific task or action required. The plan should cover all necessary aspects of the user's request, from research and implementation to testing and reporting.
 Write the plan with knowing that you have access to the browser and search engine to accomplish the task.
 After listing the steps, provide a brief summary of the plan, highlighting any key considerations, dependencies, or potential challenges.
 Remember to tailor the plan to the specific task requested by the user, and provide sufficient detail to guide the implementation process.
 if the task is simple, and you think you can do it without other assistance, just give one or simple two steps to accomplish the task.
 don't need to overcomplicate if it's not necessary.
 Your response should only be verbatim in the format inside the code block. Any other response format will be rejected.
--- a/src/agents/reporter/init.py
+++ b/src/agents/reporter/init.py
@ -0,0 +1 @@
 from .reporter import Reporter
--- a/src/agents/reporter/prompt.jinja2
+++ b/src/agents/reporter/prompt.jinja2
@ -0,0 +1,38 @@
 You are Devika, an AI Software Engineer. You have been talking to the user and this is the exchange so far:
 ```
 {% for message in conversation %}
 {{ message }}
 {% endfor %}
 ```
 {% if code_markdown %}
 Full Code:
 ~~~
 {{ code_markdown }}
 ~~~
 {% endif %}
 User's last message or request: {{ conversation[-1] }}
 Your task is generate an extensive report from all the context in this prompt. The report should be detailed and cover all the necessary information.
 The report should be lengthy and detailed. It should be at least 3000 characters long.
 Your response should be a clean Markdown. The system will automatically convert this Markdown to PDF.
 Response format:
 ```
 # Title
 ...Some text...
 # Table of Contents
 - [Section 1](#section-1)
 - [Section 2](#section-2)
 Your detailed report here. Necessary sections will follow below
 ```
 Any response other than the Markdown format will be rejected by the system. Do not include the "```" in the beginning and end of your response. Just raw complete Markdown report.
--- a/src/agents/reporter/reporter.py
+++ b/src/agents/reporter/reporter.py
@ -0,0 +1,42 @@
 import json
 from jinja2 import Environment, BaseLoader
 from src.services.utils import retry_wrapper
 from src.llm import LLM
 PROMPT = open("src/agents/reporter/prompt.jinja2").read().strip()
 class Reporter:
    def __init__(self, base_model: str):
        self.llm = LLM(model_id=base_model)
    def render(self, conversation: list, code_markdown: str) -> str:
        env = Environment(loader=BaseLoader())
        template = env.from_string(PROMPT)
        return template.render(
            conversation=conversation,
            code_markdown=code_markdown
        )
    def validate_response(self, response: str):
        response = response.strip().replace("```md", "```")
        if response.startswith("```") and response.endswith("```"):
            response = response[3:-3].strip()
        return response
    @retry_wrapper
    def execute(self,
        conversation: list,
        code_markdown: str,
        project_name: str
    ) -> str:
        prompt = self.render(conversation, code_markdown)
        response = self.llm.inference(prompt, project_name)
        valid_response = self.validate_response(response)
        return valid_response
--- a/src/agents/researcher/init.py
+++ b/src/agents/researcher/init.py
@ -0,0 +1 @@
 from .researcher import Researcher
--- a/src/agents/researcher/prompt.jinja2
+++ b/src/agents/researcher/prompt.jinja2
@ -0,0 +1,40 @@
 For the provided step-by-step plan, write all the necessary search queries to gather information from the web that the base model doesn't already know.
 Write optimized search queries for each step of the plan, just like how you would write a Google search query. Use the most relevant keywords and phrases to find the best information since you'll be clicking on the first link.
 also only ask for information if you think it's necessary, otherwise leave ask_user field empty.
 Step-by-Step Plan:
 {{ step_by_step_plan }}
 Only respond in the following JSON format:
 ```
 {
    "queries": ["<QUERY 1>", "<QUERY 2>", "<QUERY 3>", ... ],
    "ask_user": "<ASK INPUT FROM USER IF REQUIRED, OTHERWISE LEAVE EMPTY STRING>"
 }
 ```
 Example =>
 ```
 {
     "queries": ["How to do Bing Search via API in Python", "Claude API Documentation Python"],
     "ask_user": "Can you please provide API Keys for Claude, OpenAI, and Firebase?"
 }
 ```
 Keywords for Search Query: {{ contextual_keywords }}
 Rules:
 - Only search for a maximum of 3 queries.
 - Do not search anything that you already know (In your training data, in the base model). For example: You already know how to write a Python flask web server, it is in your data, so you shouldn't search how to do that.
 - Do not search for information that is not relevant to the task at hand.
 - Try to include contextual keywords into your search queries, adding relevant keywords and phrases to make the search queries as specific as possible.
 - Only search for documentation, do not search basic how tos. Forbidden Queries: How to install XYZ, How to setup ABC, etc.
 - Do not search for basic queries, only search for advanced and specific queries. You are allowed to leave the "queries" field empty if no search queries are needed for the step.
 - DO NOT EVER SEARCH FOR BASIC QUERIES. ONLY SEARCH FOR ADVANCED QUERIES.
 - YOU ARE ALLOWED TO LEAVE THE "queries" FIELD EMPTY IF NO SEARCH QUERIES ARE NEEDED FOR THE STEP.
 - you only have to return one JSON object with the queries and ask_user fields. You can't return multiple JSON objects.
 Only the provided JSON response format is accepted. Any other response format will be rejected.
--- a/src/agents/researcher/researcher.py
+++ b/src/agents/researcher/researcher.py
@ -0,0 +1,46 @@
 import json
 from typing import List
 from jinja2 import Environment, BaseLoader
 from src.llm import LLM
 from src.services.utils import retry_wrapper, validate_responses
 from src.browser.search import BingSearch
 PROMPT = open("src/agents/researcher/prompt.jinja2").read().strip()
 class Researcher:
    def __init__(self, base_model: str):
        self.bing_search = BingSearch()
        self.llm = LLM(model_id=base_model)
    def render(self, step_by_step_plan: str, contextual_keywords: str) -> str:
        env = Environment(loader=BaseLoader())
        template = env.from_string(PROMPT)
        return template.render(
            step_by_step_plan=step_by_step_plan,
            contextual_keywords=contextual_keywords
        )
    @validate_responses
    def validate_response(self, response: str) -> dict | bool:
        if "queries" not in response and "ask_user" not in response:
            return False
        else:
            return {
                "queries": response["queries"],
                "ask_user": response["ask_user"]
            }
    @retry_wrapper
    def execute(self, step_by_step_plan: str, contextual_keywords: List[str], project_name: str) -> dict | bool:
        contextual_keywords_str = ", ".join(map(lambda k: k.capitalize(), contextual_keywords))
        prompt = self.render(step_by_step_plan, contextual_keywords_str)
        response = self.llm.inference(prompt, project_name)
        valid_response = self.validate_response(response)
        return valid_response
--- a/src/agents/runner/init.py
+++ b/src/agents/runner/init.py
@ -0,0 +1 @@
 from .runner import Runner
--- a/src/agents/runner/prompt.jinja2
+++ b/src/agents/runner/prompt.jinja2
@ -0,0 +1,37 @@
 You are Devika, an AI Software Engineer. You have been talking to the user and this is the exchange so far:
 ```
 {% for message in conversation %}
 {{ message }}
 {% endfor %}
 ```
 Full Code:
 ~~~
 {{ code_markdown }}
 ~~~
 User's last message: {{ conversation[-1] }}
 System Operating System: {{ system_os }}
 Your task is to invoke the system to run this code.
 Your response should be in the following format:
 ```
 {
    "commands": [
        "pip3 install -r requirements.txt",
        "python3 main.py"
    ]
 }
 ```
 Rules:
 - You wrote the code, never address the user directly. You should not say things like "The code you provided", instead use "The code I wrote".
 - Read the full context, including the code (if any) carefully to construct the commands required to run the project.
 - The command should be compatible with the system operating system provided.
 - You are inside the project directory, so just run the commands as if you're inside the project directory as the working directory.
 - Do not do "cd" into the project directory. The system is already in the project directory.
 Any response other than the JSON format will be rejected by the system.
--- a/src/agents/runner/rerunner.jinja2
+++ b/src/agents/runner/rerunner.jinja2
@ -0,0 +1,58 @@
 You are Devika, an AI Software Engineer. You have been talking to the user and this is the exchange so far:
 ```
 {% for message in conversation %}
 {{ message }}
 {% endfor %}
 ```
 Full Code:
 ~~~
 {{ code_markdown }}
 ~~~
 User's last message: {{ conversation[-1] }}
 System Operating System: {{ system_os }}
 You tried to execute the following commands to run this project:
 ```
 {% for command in commands %}
 $ {{ command }}
 {% endfor %}
 ```
 But it resulted in the following error:
 ```
 $ {{ commands[-1] }}
 {{ error }}
 ```
 Now identify whether this error is caused by the code or the command. If it is caused by the command, provide the correct command to run the project. If it is caused by the code, respond with the patch action response.
 Patch Action Response:
 ```
 {
  "action": "patch",
  "response": "<A response like: I encountered an error while running the project. Seems to be <problem>. Let me try fixing it.>"
 }
 ```
 Command Fix Response:
 ```
 {
  "action": "command",
  "command": "<Fixed command here>"
  "response": "<A response like: I encountered an error while running the project. Seems to be <problem>. Let me try fixing it.>"
 }
 ```
 Rules:
 - You wrote the code, never address the user directly. You should not say things like "The code you provided", instead use "The code I wrote".
 - Read the full context, including the code (if any) carefully to construct the commands required to fix the error while running the project.
 - The command should be compatible with the system operating system provided.
 - You are inside the project directory, so just run the commands as if you're inside the project directory as the working directory.
 - Do not do "cd" into the project directory. The system is already in the project directory.
 - Correctly identify whether the error is caused by the code or the command. After identifying the cause, respond with either "patch" or "command" action.
 Any response other than the JSON format will be rejected by the system. ONLY RESPOND WITH THE JSON OBJECT.
--- a/src/agents/runner/runner.py
+++ b/src/agents/runner/runner.py
@ -0,0 +1,222 @@
 import time
 import json
 import os
 import subprocess
 from jinja2 import Environment, BaseLoader
 from src.agents.patcher import Patcher
 from src.llm import LLM
 from src.state import AgentState
 from src.project import ProjectManager
 from src.services.utils import retry_wrapper, validate_responses
 PROMPT = open("src/agents/runner/prompt.jinja2", "r").read().strip()
 RERUNNER_PROMPT = open("src/agents/runner/rerunner.jinja2", "r").read().strip()
 class Runner:
    def __init__(self, base_model: str):
        self.base_model = base_model
        self.llm = LLM(model_id=base_model)
    def render(
        self,
        conversation: str,
        code_markdown: str,
        system_os: str
    ) -> str:
        env = Environment(loader=BaseLoader())
        template = env.from_string(PROMPT)
        return template.render(
            conversation=conversation,
            code_markdown=code_markdown,
            system_os=system_os,
        )
    def render_rerunner(
        self,
        conversation: str,
        code_markdown: str,
        system_os: str,
        commands: list,
        error: str
    ):
        env = Environment(loader=BaseLoader())
        template = env.from_string(RERUNNER_PROMPT)
        return template.render(
            conversation=conversation,
            code_markdown=code_markdown,
            system_os=system_os,
            commands=commands,
            error=error
        )
    @validate_responses
    def validate_response(self, response: str):
        if "commands" not in response:
            return False
        else:
            return response["commands"]
    @validate_responses
    def validate_rerunner_response(self, response: str):
        if "action" not in response and "response" not in response:
            return False
        else:
            return response
    @retry_wrapper
    def run_code(
        self,
        commands: list,
        project_path: str,
        project_name: str,
        conversation: list,
        code_markdown: str,
        system_os: str
    ):  
        retries = 0
        for command in commands:
            command_set = command.split(" ")
            command_failed = False
            process = subprocess.run(
                command_set,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                cwd=project_path
            )
            command_output = process.stdout.decode('utf-8')
            command_failed = process.returncode != 0
            new_state = AgentState().new_state()
            new_state["internal_monologue"] = "Running code..."
            new_state["terminal_session"]["title"] = "Terminal"
            new_state["terminal_session"]["command"] = command
            new_state["terminal_session"]["output"] = command_output
            AgentState().add_to_current_state(project_name, new_state)
            time.sleep(1)
            while command_failed and retries < 2:
                new_state = AgentState().new_state()
                new_state["internal_monologue"] = "Oh seems like there is some error... :("
                new_state["terminal_session"]["title"] = "Terminal"
                new_state["terminal_session"]["command"] = command
                new_state["terminal_session"]["output"] = command_output
                AgentState().add_to_current_state(project_name, new_state)
                time.sleep(1)
                prompt = self.render_rerunner(
                    conversation=conversation,
                    code_markdown=code_markdown,
                    system_os=system_os,
                    commands=commands,
                    error=command_output
                )
                response = self.llm.inference(prompt, project_name)
                valid_response = self.validate_rerunner_response(response)
                if not valid_response:
                    return False
                action = valid_response["action"]
                if action == "command":
                    command = valid_response["command"]
                    response = valid_response["response"]
                    ProjectManager().add_message_from_devika(project_name, response)
                    command_set = command.split(" ")
                    command_failed = False
                    process = subprocess.run(
                        command_set,
                        stdout=subprocess.PIPE,
                        stderr=subprocess.PIPE,
                        cwd=project_path
                    )
                    command_output = process.stdout.decode('utf-8')
                    command_failed = process.returncode != 0
                    new_state = AgentState().new_state()
                    new_state["internal_monologue"] = "Running code..."
                    new_state["terminal_session"]["title"] = "Terminal"
                    new_state["terminal_session"]["command"] = command
                    new_state["terminal_session"]["output"] = command_output
                    AgentState().add_to_current_state(project_name, new_state)
                    time.sleep(1)
                    if command_failed:
                        retries += 1
                    else:
                        break
                elif action == "patch":
                    response = valid_response["response"]
                    ProjectManager().add_message_from_devika(project_name, response)
                    code = Patcher(base_model=self.base_model).execute(
                        conversation=conversation,
                        code_markdown=code_markdown,
                        commands=commands,
                        error=command_output,
                        system_os=system_os,
                        project_name=project_name
                    )
                    Patcher(base_model=self.base_model).save_code_to_project(code, project_name)
                    command_set = command.split(" ")
                    command_failed = False
                    process = subprocess.run(
                        command_set,
                        stdout=subprocess.PIPE,
                        stderr=subprocess.PIPE,
                        cwd=project_path
                    )
                    command_output = process.stdout.decode('utf-8')
                    command_failed = process.returncode != 0
                    new_state = AgentState().new_state()
                    new_state["internal_monologue"] = "Running code..."
                    new_state["terminal_session"]["title"] = "Terminal"
                    new_state["terminal_session"]["command"] = command
                    new_state["terminal_session"]["output"] = command_output
                    AgentState().add_to_current_state(project_name, new_state)
                    time.sleep(1)
                    if command_failed:
                        retries += 1
                    else:
                        break
    @retry_wrapper
    def execute(
        self,
        conversation: list,
        code_markdown: str,
        os_system: str,
        project_path: str,
        project_name: str
    ) -> str:
        prompt = self.render(conversation, code_markdown, os_system)
        response = self.llm.inference(prompt, project_name)
        valid_response = self.validate_response(response)
        self.run_code(
            valid_response,
            project_path,
            project_name,
            conversation,
            code_markdown,
            os_system
        )
        return valid_response
--- a/src/apis/project.py
+++ b/src/apis/project.py
@ -0,0 +1,62 @@
 from flask import blueprints, request, jsonify, send_file, make_response
 from werkzeug.utils import secure_filename
 from src.logger import Logger, route_logger
 from src.config import Config
 from src.project import ProjectManager
 from ..state import AgentState
 import os
 project_bp = blueprints.Blueprint("project", __name__)
 logger = Logger()
 manager = ProjectManager()
 # Project APIs
@project_bp.route("/api/get-project-files", methods=["GET"])
@route_logger(logger)
 def project_files():
    project_name = secure_filename(request.args.get("project_name"))
    files = manager.get_project_files(project_name)  
    return jsonify({"files": files})
@project_bp.route("/api/create-project", methods=["POST"])
@route_logger(logger)
 def create_project():
    data = request.json
    project_name = data.get("project_name")
    manager.create_project(secure_filename(project_name))
    return jsonify({"message": "Project created"})
@project_bp.route("/api/delete-project", methods=["POST"])
@route_logger(logger)
 def delete_project():
    data = request.json
    project_name = secure_filename(data.get("project_name"))
    manager.delete_project(project_name)
    AgentState().delete_state(project_name)
    return jsonify({"message": "Project deleted"})
@project_bp.route("/api/download-project", methods=["GET"])
@route_logger(logger)
 def download_project():
    project_name = secure_filename(request.args.get("project_name"))
    manager.project_to_zip(project_name)
    project_path = manager.get_zip_path(project_name)
    return send_file(project_path, as_attachment=False)
@project_bp.route("/api/download-project-pdf", methods=["GET"])
@route_logger(logger)
 def download_project_pdf():
    project_name = secure_filename(request.args.get("project_name"))
    pdf_dir = Config().get_pdfs_dir()
    pdf_path = os.path.join(pdf_dir, f"{project_name}.pdf")
    response = make_response(send_file(pdf_path))
    response.headers['Content-Type'] = 'project_bplication/pdf'
    return response
--- a/src/bert/sentence.py
+++ b/src/bert/sentence.py
@ -0,0 +1,17 @@
 from keybert import KeyBERT
 class SentenceBert:
    def __init__(self, sentence: str):
        self.sentence = sentence
        self.kw_model = KeyBERT()
    def extract_keywords(self, top_n: int = 5) -> list:
        keywords = self.kw_model.extract_keywords(
            self.sentence,
            keyphrase_ngram_range=(1, 1),
            stop_words='english',
            top_n=top_n,
            use_mmr=True,
            diversity=0.7
        )
        return keywords
--- a/src/browser/init.py
+++ b/src/browser/init.py
@ -0,0 +1,2 @@
 from .browser import Browser
 from .interaction import start_interaction
--- a/src/browser/browser.py
+++ b/src/browser/browser.py
@ -0,0 +1,89 @@
 import asyncio
 import base64
 import os
 from playwright.sync_api import sync_playwright, TimeoutError, Page
 from playwright.async_api import async_playwright, TimeoutError
 from markdownify import markdownify as md
 from pdfminer.high_level import extract_text
 from src.socket_instance import emit_agent
 from src.config import Config
 from src.state import AgentState
 class Browser:
    def __init__(self):
        self.playwright = None
        self.browser = None
        self.page = None
        self.agent = AgentState()
    async def start(self):
        self.playwright = await async_playwright().start()
        self.browser = await self.playwright.chromium.launch(headless=True)
        self.page = await self.browser.new_page()
        return self
    # def new_page(self):
    #     return self.browser.new_page()
    async def go_to(self, url):
        try:
            await self.page.goto(url, timeout=20000)
        except TimeoutError as e:
            print(f"TimeoutError: {e} when trying to navigate to {url}")
            return False
        return True
    async def screenshot(self, project_name):
        screenshots_save_path = Config().get_screenshots_dir()
        page_metadata = await self.page.evaluate("() => { return { url: document.location.href, title: document.title } }")
        page_url = page_metadata['url']
        random_filename = os.urandom(20).hex()
        filename_to_save = f"{random_filename}.png"
        path_to_save = os.path.join(screenshots_save_path, filename_to_save)
        await self.page.emulate_media(media="screen")
        await self.page.screenshot(path=path_to_save, full_page=True)
        screenshot = await self.page.screenshot()
        screenshot_bytes = base64.b64encode(screenshot).decode()
        new_state = self.agent.new_state()
        new_state["internal_monologue"] = "Browsing the web right now..."
        new_state["browser_session"]["url"] = page_url
        new_state["browser_session"]["screenshot"] = path_to_save
        self.agent.add_to_current_state(project_name, new_state)
        # self.close()
        return path_to_save, screenshot_bytes
    def get_html(self):
        return self.page.content()
    def get_markdown(self):
        return md(self.page.content())
    def get_pdf(self):
        pdfs_save_path = Config().get_pdfs_dir()
        page_metadata = self.page.evaluate("() => { return { url: document.location.href, title: document.title } }")
        filename_to_save = f"{page_metadata['title']}.pdf"
        save_path = os.path.join(pdfs_save_path, filename_to_save)
        self.page.pdf(path=save_path)
        return save_path
    def pdf_to_text(self, pdf_path):
        return extract_text(pdf_path).strip()
    def get_content(self):
        pdf_path = self.get_pdf()
        return self.pdf_to_text(pdf_path)
    def extract_text(self):
        return self.page.evaluate("() => document.body.innerText")
    async def close(self):
        await self.page.close()
        await self.browser.close()
--- a/src/browser/interaction.py
+++ b/src/browser/interaction.py
@ -0,0 +1,547 @@
 #!/usr/bin/env python3
 #
 # natbot.py
 # https://github.com/nat/natbot
 #
 # MODIFIED FOR DEVIKA
 from playwright.sync_api import sync_playwright
 import os
 import time
 from sys import exit, platform
 from src.config import Config
 from src.state import AgentState
 from src.llm import LLM
 prompt_template = """
 You are an agent controlling a browser. You are given:
 	(1) an objective that you are trying to achieve
 	(2) the URL of your current web page
 	(3) a simplified text description of what's visible in the browser window (more on that below)
 You can issue these commands:
 	SCROLL UP - scroll up one page
 	SCROLL DOWN - scroll down one page
 	CLICK X - click on a given element. You can only click on links, buttons, and inputs!
 	TYPE X "TEXT" - type the specified text into the input with id X
 	TYPESUBMIT X "TEXT" - same as TYPE above, except then it presses ENTER to submit the form
 The format of the browser content is highly simplified; all formatting elements are stripped.
 Interactive elements such as links, inputs, buttons are represented like this:
 		<link id=1>text</link>
 		<button id=2>text</button>
 		<input id=3>text</input>
 Images are rendered as their alt text like this:
 		<img id=4 alt=""/>
 Based on your given objective, issue whatever command you believe will get you closest to achieving your goal.
 You always start on Google; you should submit a search query to Google that will take you to the best page for
 achieving your objective. And then interact with that page to achieve your objective.
 If you find yourself on Google and there are no search results displayed yet, you should probably issue a command 
 like "TYPESUBMIT 7 "search query"" to get to a more useful page.
 Then, if you find yourself on a Google search results page, you might issue the command "CLICK 24" to click
 on the first link in the search results. (If your previous command was a TYPESUBMIT your next command should
 probably be a CLICK.)
 Don't try to interact with elements that you can't see.
 Here are some examples:
 EXAMPLE 1:
 ==================================================
 CURRENT BROWSER CONTENT:
 ------------------
 <link id=1>About</link>
 <link id=2>Store</link>
 <link id=3>Gmail</link>
 <link id=4>Images</link>
 <link id=5>(Google apps)</link>
 <link id=6>Sign in</link>
 <img id=7 alt="(Google)"/>
 <input id=8 alt="Search"></input>
 <button id=9>(Search by voice)</button>
 <button id=10>(Google Search)</button>
 <button id=11>(I'm Feeling Lucky)</button>
 <link id=12>Advertising</link>
 <link id=13>Business</link>
 <link id=14>How Search works</link>
 <link id=15>Carbon neutral since 2007</link>
 <link id=16>Privacy</link>
 <link id=17>Terms</link>
 <text id=18>Settings</text>
 ------------------
 OBJECTIVE: Find a 2 bedroom house for sale in Anchorage AK for under $750k
 CURRENT URL: https://www.google.com/
 YOUR COMMAND: 
 TYPESUBMIT 8 "anchorage redfin"
 ==================================================
 EXAMPLE 2:
 ==================================================
 CURRENT BROWSER CONTENT:
 ------------------
 <link id=1>About</link>
 <link id=2>Store</link>
 <link id=3>Gmail</link>
 <link id=4>Images</link>
 <link id=5>(Google apps)</link>
 <link id=6>Sign in</link>
 <img id=7 alt="(Google)"/>
 <input id=8 alt="Search"></input>
 <button id=9>(Search by voice)</button>
 <button id=10>(Google Search)</button>
 <button id=11>(I'm Feeling Lucky)</button>
 <link id=12>Advertising</link>
 <link id=13>Business</link>
 <link id=14>How Search works</link>
 <link id=15>Carbon neutral since 2007</link>
 <link id=16>Privacy</link>
 <link id=17>Terms</link>
 <text id=18>Settings</text>
 ------------------
 OBJECTIVE: Make a reservation for 4 at Dorsia at 8pm
 CURRENT URL: https://www.google.com/
 YOUR COMMAND: 
 TYPESUBMIT 8 "dorsia nyc opentable"
 ==================================================
 EXAMPLE 3:
 ==================================================
 CURRENT BROWSER CONTENT:
 ------------------
 <button id=1>For Businesses</button>
 <button id=2>Mobile</button>
 <button id=3>Help</button>
 <button id=4 alt="Language Picker">EN</button>
 <link id=5>OpenTable logo</link>
 <button id=6 alt ="search">Search</button>
 <text id=7>Find your table for any occasion</text>
 <button id=8>(Date selector)</button>
 <text id=9>Sep 28, 2022</text>
 <text id=10>7:00 PM</text>
 <text id=11>2 people</text>
 <input id=12 alt="Location, Restaurant, or Cuisine"></input> 
 <button id=13>Let's go</button>
 <text id=14>It looks like you're in Peninsula. Not correct?</text> 
 <button id=15>Get current location</button>
 <button id=16>Next</button>
 ------------------
 OBJECTIVE: Make a reservation for 4 for dinner at Dorsia in New York City at 8pm
 CURRENT URL: https://www.opentable.com/
 YOUR COMMAND: 
 TYPESUBMIT 12 "dorsia new york city"
 ==================================================
 The current browser content, objective, and current URL follow. Reply with your next command to the browser.
 CURRENT BROWSER CONTENT:
 ------------------
 $browser_content
 ------------------
 OBJECTIVE: $objective
 CURRENT URL: $url
 PREVIOUS COMMAND: $previous_command
 YOUR COMMAND:
 """
 black_listed_elements = set(["html", "head", "title", "meta", "iframe", "body", "script", "style", "path", "svg", "br", "::marker",])
 class Crawler:
 	def __init__(self):
 		self.browser = (
 			sync_playwright()
 			.start()
 			.chromium.launch(
 				headless=True,
 			)
 		)
 		self.page = self.browser.new_page()
 		self.page.set_viewport_size({"width": 1280, "height": 1080})
 	def screenshot(self, project_name):
 		screenshots_save_path = Config().get_screenshots_dir()
 		page_metadata = self.page.evaluate("() => { return { url: document.location.href, title: document.title } }")
 		page_url = page_metadata['url']
 		random_filename = os.urandom(20).hex()
 		filename_to_save = f"{random_filename}.png"
 		path_to_save = os.path.join(screenshots_save_path, filename_to_save)
 		self.page.emulate_media(media="screen")
 		self.page.screenshot(path=path_to_save)
 		new_state = AgentState().new_state()
 		new_state["internal_monologue"] = "Browsing the web right now..."
 		new_state["browser_session"]["url"] = page_url
 		new_state["browser_session"]["screenshot"] = path_to_save
 		AgentState().add_to_current_state(project_name, new_state)        
 		return path_to_save
 	def go_to_page(self, url):
 		self.page.goto(url=url if "://" in url else "http://" + url)
 		self.client = self.page.context.new_cdp_session(self.page)
 		self.page_element_buffer = {}
 	def scroll(self, direction):
 		if direction == "up":
 			self.page.evaluate(
 				"(document.scrollingElement || document.body).scrollTop = (document.scrollingElement || document.body).scrollTop - window.innerHeight;"
 			)
 		elif direction == "down":
 			self.page.evaluate(
 				"(document.scrollingElement || document.body).scrollTop = (document.scrollingElement || document.body).scrollTop + window.innerHeight;"
 			)
 	def click(self, id):
 		# Inject javascript into the page which removes the target= attribute from all links
 		js = """
 		links = document.getElementsByTagName("a");
 		for (var i = 0; i < links.length; i++) {
 			links[i].removeAttribute("target");
 		}
 		"""
 		self.page.evaluate(js)
 		element = self.page_element_buffer.get(int(id))
 		if element:
 			x = element.get("center_x")
 			y = element.get("center_y")
 			self.page.mouse.click(x, y)
 		else:
 			print("Could not find element")
 	def type(self, id, text):
 		self.click(id)
 		self.page.keyboard.type(text)
 	def enter(self):
 		self.page.keyboard.press("Enter")
 	def crawl(self):
 		page = self.page
 		page_element_buffer = self.page_element_buffer
 		start = time.time()
 		page_state_as_text = []
 		device_pixel_ratio = page.evaluate("window.devicePixelRatio")
 		if platform == "darwin" and device_pixel_ratio == 1:  # lies
 			device_pixel_ratio = 2
 		win_scroll_x 		= page.evaluate("window.scrollX")
 		win_scroll_y 		= page.evaluate("window.scrollY")
 		win_upper_bound 	= page.evaluate("window.pageYOffset")
 		win_left_bound 		= page.evaluate("window.pageXOffset") 
 		win_width 			= page.evaluate("window.screen.width")
 		win_height 			= page.evaluate("window.screen.height")
 		win_right_bound 	= win_left_bound + win_width
 		win_lower_bound 	= win_upper_bound + win_height
 		document_offset_height = page.evaluate("document.body.offsetHeight")
 		document_scroll_height = page.evaluate("document.body.scrollHeight")
 		# Removed unused percentage_progress variables
 		tree = self.client.send(
 			"DOMSnapshot.captureSnapshot",
 			{"computedStyles": [], "includeDOMRects": True, "includePaintOrder": True},
 		)
 		strings	 	= tree["strings"]
 		document 	= tree["documents"][0]
 		nodes 		= document["nodes"]
 		backend_node_id = nodes["backendNodeId"]
 		attributes 	= nodes["attributes"]
 		node_value 	= nodes["nodeValue"]
 		parent 		= nodes["parentIndex"]
 		node_types 	= nodes["nodeType"]
 		node_names 	= nodes["nodeName"]
 		is_clickable = set(nodes["isClickable"]["index"])
 		text_value 			= nodes["textValue"]
 		text_value_index 	= text_value["index"]
 		text_value_values 	= text_value["value"]
 		input_value 		= nodes["inputValue"]
 		input_value_index 	= input_value["index"]
 		input_value_values 	= input_value["value"]
 		input_checked 		= nodes["inputChecked"]
 		layout 				= document["layout"]
 		layout_node_index 	= layout["nodeIndex"]
 		bounds 				= layout["bounds"]
 		cursor = 0
 		html_elements_text = []
 		child_nodes = {}
 		elements_in_view_port = []
 		# Refactored to use dict.setdefault() for cleaner logic
 		ancestor_exceptions = {
 			"a": {"ancestry": {"-1": (False, None)}, "nodes": {}},
 			"button": {"ancestry": {"-1": (False, None)}, "nodes": {}},
 		}
 		def convert_name(node_name, is_clickable):
 			if node_name == "a":
 				return "link"
 			if node_name == "input":
 				return "input"
 			if node_name == "img":
 				return "img"
 			if node_name == "button" or is_clickable:
 				return "button"
 			return "text"
 		def find_attributes(attributes, keys):
 			values = {}
 			for [key_index, value_index] in zip(*(iter(attributes),) * 2):
 				if value_index < 0:
 					continue
 				key = strings[key_index]
 				value = strings[value_index]
 				if key in keys:
 					values[key] = value
 					keys.remove(key)
 					if not keys:
 						return values
 			return values
 		def add_to_hash_tree(hash_tree, tag, node_id, node_name, parent_id):
 			parent_id_str = str(parent_id)
 			if parent_id_str not in hash_tree:
 				parent_name = strings[node_names[parent_id]].lower()
 				grand_parent_id = parent[parent_id]
 				add_to_hash_tree(hash_tree, tag, parent_id, parent_name, grand_parent_id)
 			is_parent_desc_anchor, anchor_id = hash_tree[parent_id_str]
 			value = (True, node_id) if node_name == tag else (True, anchor_id) if is_parent_desc_anchor else (False, None)
 			hash_tree[str(node_id)] = value
 			return value
 		for index, node_name_index in enumerate(node_names):
 			node_parent = parent[index]
 			node_name = strings[node_name_index].lower()
 			# Refactored to use dict to store exceptions
 			for tag in ancestor_exceptions:
 				is_ancestor_of_tag, tag_id = add_to_hash_tree(ancestor_exceptions[tag]["ancestry"], tag, index, node_name, node_parent)
 				ancestor_exceptions[tag]["nodes"][str(index)] = (is_ancestor_of_tag, tag_id)
 			try:
 				cursor = layout_node_index.index(index)
 			except:
 				continue
 			if node_name in black_listed_elements:
 				continue
 			[x, y, width, height] = bounds[cursor]
 			x /= device_pixel_ratio
 			y /= device_pixel_ratio
 			width /= device_pixel_ratio
 			height /= device_pixel_ratio
 			elem_left_bound = x
 			elem_top_bound = y
 			elem_right_bound = x + width
 			elem_lower_bound = y + height
 			partially_is_in_viewport = (
 				elem_left_bound < win_right_bound
 				and elem_right_bound >= win_left_bound
 				and elem_top_bound < win_lower_bound
 				and elem_lower_bound >= win_upper_bound
 			)
 			if not partially_is_in_viewport:
 				continue
 			meta_data = []
 			# Refactored to use dict to store and access attributes
 			element_attributes = find_attributes(
 				attributes[index], ["type", "placeholder", "aria-label", "title", "alt"]
 			)
 			ancestor_exception = {
 				tag: ancestor_exceptions[tag]["nodes"].get(str(index), (False, None))
 				for tag in ancestor_exceptions
 			}
 			is_ancestor_of_anchor, anchor_id = ancestor_exception.get("a", (False, None))  
 			is_ancestor_of_button, button_id = ancestor_exception.get("button", (False, None))
 			ancestor_node_key = (
 				str(anchor_id) if is_ancestor_of_anchor else str(button_id) if is_ancestor_of_button else None
 			)
 			ancestor_node = (
 				child_nodes.setdefault(str(ancestor_node_key), []) 
 				if is_ancestor_of_anchor or is_ancestor_of_button
 				else None
 			)
 			if node_name == "#text" and ancestor_node is not None:
 				text = strings[node_value[index]]
 				if text in ["•", "|"]:
 					continue
 				ancestor_node.append({"type": "text", "value": text})
 			else:
 				if (node_name == "input" and element_attributes.get("type") == "submit") or node_name == "button":
 					node_name = "button"
 					element_attributes.pop("type", None)
 				for key, value in element_attributes.items():
 					if ancestor_node is not None:
 						ancestor_node.append({"type": "attribute", "key": key, "value": value})
 					else:  
 						meta_data.append(value)
 			element_node_value = None
 			if node_value[index] >= 0:
 				element_node_value = strings[node_value[index]]
 				if element_node_value == "|": 
 					continue
 			elif node_name == "input" and index in input_value_index:
 				input_text_index = input_value_index.index(index)
 				text_index = input_value_values[input_text_index]
 				if text_index >= 0:
 					element_node_value = strings[text_index]
 			if (is_ancestor_of_anchor or is_ancestor_of_button) and (node_name != "a" and node_name != "button"):
 				continue
 			elements_in_view_port.append({
 				"node_index": str(index),
 				"backend_node_id": backend_node_id[index],
 				"node_name": node_name,
 				"node_value": element_node_value,
 				"node_meta": meta_data,
 				"is_clickable": index in is_clickable,
 				"origin_x": int(x),
 				"origin_y": int(y),
 				"center_x": int(x + (width / 2)),
 				"center_y": int(y + (height / 2)),
 			})
 		elements_of_interest = []
 		id_counter = 0
 		for element in elements_in_view_port:
 			node_index = element["node_index"]
 			node_name = element["node_name"]
 			node_value = element["node_value"]
 			is_clickable = element["is_clickable"] 
 			meta_data = element["node_meta"]
 			inner_text = f"{node_value} " if node_value else ""
 			meta = ""
 			if node_index in child_nodes:
 				for child in child_nodes[node_index]:
 					entry_type = child["type"]
 					entry_value = child["value"]
 					if entry_type == "attribute":
 						entry_key = child["key"]
 						meta_data.append(f'{entry_key}="{entry_value}"')
 					else:
 						inner_text += f"{entry_value} "
 			if meta_data:
 				meta = f' {" ".join(meta_data)}'
 			inner_text = inner_text.strip()
 			# Refactored to use descriptive variable names
 			should_include_element = (
 				inner_text != "" or
 				node_name in ["link", "input", "img", "button", "textarea"] or
 				(node_name == "button" and meta != "")
 			)
 			if not should_include_element:
 				continue
 			page_element_buffer[id_counter] = element
 			element_string = f'<{convert_name(node_name, is_clickable)} id={id_counter}{meta}>'
 			if inner_text:
 				element_string += f'{inner_text}</{convert_name(node_name, is_clickable)}>'
 			else:
 				element_string += '/>'
 			elements_of_interest.append(element_string)
 			id_counter += 1
 		print(f'Parsing time: {time.time() - start:.2f} seconds')
 		return elements_of_interest
 def start_interaction(model_id, objective, project_name):
 	_crawler = Crawler()
 	def print_help():
 		print(
 			"(g) to visit url\n(u) scroll up\n(d) scroll down\n(c) to click\n(t) to type\n" +
 			"(h) to view commands again\n(r/enter) to run suggested command\n(o) change objective"
 		)
 	def get_gpt_command(objective, url, previous_command, browser_content):
 		prompt = prompt_template
 		prompt = prompt.replace("$objective", objective)
 		prompt = prompt.replace("$url", url[:100])
 		prompt = prompt.replace("$previous_command", previous_command)
 		prompt = prompt.replace("$browser_content", browser_content[:4500])
 		response = LLM(model_id=model_id).inference(prompt)
 		return response
 	def run_cmd(cmd):
 		cmd = cmd.split("\n")[0]
 		if cmd.startswith("SCROLL UP"):
 			_crawler.scroll("up")
 		elif cmd.startswith("SCROLL DOWN"):
 			_crawler.scroll("down")
 		elif cmd.startswith("CLICK"):
 			commasplit = cmd.split(",")
 			id = commasplit[0].split(" ")[1]
 			_crawler.click(id)
 		elif cmd.startswith("TYPE"):
 			spacesplit = cmd.split(" ")
 			id = spacesplit[1]
 			text = " ".join(spacesplit[2:])
 			text = text[1:-1]
 			if cmd.startswith("TYPESUBMIT"):
 				text += '\n'  
 			_crawler.type(id, text)
 		time.sleep(2)
 	gpt_cmd = ""
 	prev_cmd = ""
 	_crawler.go_to_page("google.com")
 	try:
 		visits = 0
 		while True and visits < 5:
 			browser_content = "\n".join(_crawler.crawl())
 			prev_cmd = gpt_cmd
 			current_url = _crawler.page.url
 			_crawler.screenshot(project_name)
 			gpt_cmd = get_gpt_command(objective, current_url, prev_cmd, browser_content).strip()
 			run_cmd(gpt_cmd)
 			visits += 1
 	except KeyboardInterrupt:
 		print("\n[!] Ctrl+C detected, exiting gracefully.")
 		exit(0)
--- a/src/browser/search.py
+++ b/src/browser/search.py
@ -0,0 +1,167 @@
 import requests
 from src.config import Config
 import re
 from urllib.parse import unquote
 from html import unescape
 import orjson
 class BingSearch:
    def __init__(self):
        self.config = Config()
        self.bing_api_key = self.config.get_bing_api_key()
        self.bing_api_endpoint = self.config.get_bing_api_endpoint()
        self.query_result = None
    def search(self, query):
        headers = {"Ocp-Apim-Subscription-Key": self.bing_api_key}
        params = {"q": query, "mkt": "en-US"}
        try:
            response = requests.get(self.bing_api_endpoint, headers=headers, params=params)
            response.raise_for_status()
            self.query_result = response.json()
            return self.query_result
        except Exception as error:
            return error
    def get_first_link(self):
        return self.query_result["webPages"]["value"][0]["url"]
 class GoogleSearch:
    def __init__(self):
        self.config = Config()
        self.google_search_api_key = self.config.get_google_search_api_key()
        self.google_search_engine_ID = self.config.get_google_search_engine_id()
        self.google_search_api_endpoint = self.config.get_google_search_api_endpoint()
        self.query_result = None
    def search(self, query):
        params = {
            "key": self.google_search_api_key,
            "cx": self.google_search_engine_ID,
            "q": query
        }
        try:
            print("Searching in Google...")
            response = requests.get(self.google_search_api_endpoint, params=params)
            # response.raise_for_status()
            self.query_result = response.json()
        except Exception as error:
            return error
    def get_first_link(self):
        item = ""
        try:
            if 'items' in self.query_result:
                item = self.query_result['items'][0]['link']
            return item
        except Exception as error:
            print(error)
            return ""
 # class DuckDuckGoSearch:
 #     def __init__(self):
 #         self.query_result = None
 #
 #     def search(self, query):
 #         from duckduckgo_search import DDGS
 #         try:
 #             self.query_result = DDGS().text(query, max_results=5, region="us")
 #             print(self.query_result)
 #
 #         except Exception as err:
 #             print(err)
 #
 #     def get_first_link(self):
 #         if self.query_result:
 #             return self.query_result[0]["href"]
 #         else:
 #             return None
 #
 class DuckDuckGoSearch:
    """DuckDuckGo search engine class.
    methods are inherited from the duckduckgo_search package.
    do not change the methods.
    currently, the package is not working with our current setup.
    """
    def __init__(self):
        from curl_cffi import requests as curl_requests
        self.query_result = None
        self.asession = curl_requests.Session(impersonate="chrome", allow_redirects=False)
        self.asession.headers["Referer"] = "https://duckduckgo.com/"
    def _get_url(self, method, url, data):
        try:
            resp = self.asession.request(method, url, data=data)
            if resp.status_code == 200:
                return resp.content
            if resp.status_code == (202, 301, 403):
                raise Exception(f"Error: {resp.status_code} rate limit error")
            if not resp:
                return None
        except Exception as error:
            if "timeout" in str(error).lower():
                raise TimeoutError("Duckduckgo timed out error")
    def duck(self, query):
        resp = self._get_url("POST", "https://duckduckgo.com/", data={"q": query})
        vqd = self.extract_vqd(resp)
        params = {"q": query, "kl": 'en-us', "p": "1", "s": "0", "df": "", "vqd": vqd, "ex": ""}
        resp = self._get_url("GET", "https://links.duckduckgo.com/d.js", params)
        page_data = self.text_extract_json(resp)
        results = []
        for row in page_data:
            href = row.get("u")
            if href and href != f"http://www.google.com/search?q={query}":
                body = self.normalize(row["a"])
                if body:
                    result = {
                        "title": self.normalize(row["t"]),
                        "href": self.normalize_url(href),
                        "body": self.normalize(row["a"]),
                    }
                    results.append(result)
        self.query_result = results
    def search(self, query):
        self.duck(query)
    def get_first_link(self):
        return self.query_result[0]["href"]
    @staticmethod
    def extract_vqd(html_bytes: bytes) -> str:
        patterns = [(b'vqd="', 5, b'"'), (b"vqd=", 4, b"&"), (b"vqd='", 5, b"'")]
        for start_pattern, offset, end_pattern in patterns:
            try:
                start = html_bytes.index(start_pattern) + offset
                end = html_bytes.index(end_pattern, start)
                return html_bytes[start:end].decode()
            except ValueError:
                continue
    @staticmethod
    def text_extract_json(html_bytes):
        try:
            start = html_bytes.index(b"DDG.pageLayout.load('d',") + 24
            end = html_bytes.index(b");DDG.duckbar.load(", start)
            return orjson.loads(html_bytes[start:end])
        except Exception as ex:
            print(f"Error extracting JSON: {type(ex).__name__}: {ex}")
    @staticmethod
    def normalize_url(url: str) -> str:
        return unquote(url.replace(" ", "+")) if url else ""
    @staticmethod
    def normalize(raw_html: str) -> str:
        return unescape(re.sub("<.*?>", "", raw_html)) if raw_html else ""
--- a/src/config.py
+++ b/src/config.py
@ -0,0 +1,188 @@
 import toml
 import os
 class Config:
    _instance = None
    def __new__(cls):
        if cls._instance is None:
            cls._instance = super().__new__(cls)
            cls._instance._load_config()
        return cls._instance
    def _load_config(self):
        # If the config file doesn't exist, copy from the sample
        if not os.path.exists("config.toml"):
            with open("sample.config.toml", "r") as f_in, open("config.toml", "w+") as f_out:
                f_out.write(f_in.read())
                f_out.seek(0)
                self.config = toml.load(f_out)
        else:
            # check if all the keys are present in the config file
            with open("sample.config.toml", "r") as f:
                sample_config = toml.load(f)
            with open("config.toml", "r+") as f:
                config = toml.load(f)
                # Update the config with any missing keys and their keys of keys
                for key, value in sample_config.items():
                    config.setdefault(key, value)
                    if isinstance(value, dict):
                        for sub_key, sub_value in value.items():
                            config[key].setdefault(sub_key, sub_value)
                f.seek(0)
                toml.dump(config, f)
                f.truncate()
            self.config = config
    def get_config(self):
        return self.config
    def get_bing_api_endpoint(self):
        return self.config["API_ENDPOINTS"]["BING"]
    def get_bing_api_key(self):
        return self.config["API_KEYS"]["BING"]
    def get_google_search_api_key(self):
        return self.config["API_KEYS"]["GOOGLE_SEARCH"]
    def get_google_search_engine_id(self):
        return self.config["API_KEYS"]["GOOGLE_SEARCH_ENGINE_ID"]
    def get_google_search_api_endpoint(self):
        return self.config["API_ENDPOINTS"]["GOOGLE"]
    def get_ollama_api_endpoint(self):
        return self.config["API_ENDPOINTS"]["OLLAMA"]
    def get_claude_api_key(self):
        return self.config["API_KEYS"]["CLAUDE"]
    def get_openai_api_key(self):
        return self.config["API_KEYS"]["OPENAI"]
    def get_openai_api_base_url(self):
        return self.config["API_ENDPOINTS"]["OPENAI"]
    def get_gemini_api_key(self):
        return self.config["API_KEYS"]["GEMINI"]
    def get_mistral_api_key(self):
        return self.config["API_KEYS"]["MISTRAL"]
    def get_groq_api_key(self):
        return self.config["API_KEYS"]["GROQ"]
    def get_netlify_api_key(self):
        return self.config["API_KEYS"]["NETLIFY"]
    def get_sqlite_db(self):
        return self.config["STORAGE"]["SQLITE_DB"]
    def get_screenshots_dir(self):
        return self.config["STORAGE"]["SCREENSHOTS_DIR"]
    def get_pdfs_dir(self):
        return self.config["STORAGE"]["PDFS_DIR"]
    def get_projects_dir(self):
        return self.config["STORAGE"]["PROJECTS_DIR"]
    def get_logs_dir(self):
        return self.config["STORAGE"]["LOGS_DIR"]
    def get_repos_dir(self):
        return self.config["STORAGE"]["REPOS_DIR"]
    def get_logging_rest_api(self):
        return self.config["LOGGING"]["LOG_REST_API"] == "true"
    def get_logging_prompts(self):
        return self.config["LOGGING"]["LOG_PROMPTS"] == "true"
    def get_timeout_inference(self):
        return self.config["TIMEOUT"]["INFERENCE"]
    def set_bing_api_key(self, key):
        self.config["API_KEYS"]["BING"] = key
        self.save_config()
    def set_bing_api_endpoint(self, endpoint):
        self.config["API_ENDPOINTS"]["BING"] = endpoint
        self.save_config()
    def set_google_search_api_key(self, key):
        self.config["API_KEYS"]["GOOGLE_SEARCH"] = key
        self.save_config()
    def set_google_search_engine_id(self, key):
        self.config["API_KEYS"]["GOOGLE_SEARCH_ENGINE_ID"] = key
        self.save_config()
    def set_google_search_api_endpoint(self, endpoint):
        self.config["API_ENDPOINTS"]["GOOGLE_SEARCH"] = endpoint
        self.save_config()
    def set_ollama_api_endpoint(self, endpoint):
        self.config["API_ENDPOINTS"]["OLLAMA"] = endpoint
        self.save_config()
    def set_claude_api_key(self, key):
        self.config["API_KEYS"]["CLAUDE"] = key
        self.save_config()
    def set_openai_api_key(self, key):
        self.config["API_KEYS"]["OPENAI"] = key
        self.save_config()
    def set_openai_api_endpoint(self,endpoint):
        self.config["API_ENDPOINTS"]["OPENAI"] = endpoint
        self.save_config()
    def set_gemini_api_key(self, key):
        self.config["API_KEYS"]["GEMINI"] = key
        self.save_config()
    def set_mistral_api_key(self, key):
        self.config["API_KEYS"]["MISTRAL"] = key
        self.save_config()
    def set_groq_api_key(self, key):
        self.config["API_KEYS"]["GROQ"] = key
        self.save_config()
    def set_netlify_api_key(self, key):
        self.config["API_KEYS"]["NETLIFY"] = key
        self.save_config()
    def set_logging_rest_api(self, value):
        self.config["LOGGING"]["LOG_REST_API"] = "true" if value else "false"
        self.save_config()
    def set_logging_prompts(self, value):
        self.config["LOGGING"]["LOG_PROMPTS"] = "true" if value else "false"
        self.save_config()
    def set_timeout_inference(self, value):
        self.config["TIMEOUT"]["INFERENCE"] = value
        self.save_config()
    def save_config(self):
        with open("config.toml", "w") as f:
            toml.dump(self.config, f)
    def update_config(self, data):
        for key, value in data.items():
            if key in self.config:
                with open("config.toml", "r+") as f:
                    config = toml.load(f)
                    for sub_key, sub_value in value.items():
                        self.config[key][sub_key] = sub_value
                        config[key][sub_key] = sub_value
                    f.seek(0)
                    toml.dump(config, f)
--- a/src/documenter/graphwiz.py
+++ b/src/documenter/graphwiz.py
--- a/src/documenter/pdf.py
+++ b/src/documenter/pdf.py
@ -0,0 +1,23 @@
 import os
 from io import BytesIO
 from markdown import markdown
 from xhtml2pdf import pisa
 from src.config import Config
 class PDF:
    def __init__(self):
        config = Config()
        self.pdf_path = config.get_pdfs_dir()
    def markdown_to_pdf(self, markdown_string, project_name):
        html_string = markdown(markdown_string)
        out_file_path = os.path.join(self.pdf_path, f"{project_name}.pdf")
        with open(out_file_path, "wb") as out_file:
            pisa_status = pisa.CreatePDF(html_string, dest=out_file)
        if pisa_status.err:
            raise Exception("Error generating PDF")
        return out_file_path
--- a/src/documenter/uml.py
+++ b/src/documenter/uml.py
--- a/src/experts/UNIMPLEMENTED
+++ b/src/experts/UNIMPLEMENTED
--- a/src/experts/chemistry.py
+++ b/src/experts/chemistry.py
@ -0,0 +1,11 @@
 """
 Function calls and Parser for:
 - SMILE Notation
 - Molecule Parser
 Visualization for:
 - Molecule Structure
 - Molecule Properties
 Use RDKit bindings
 """
--- a/src/experts/game-dev.py
+++ b/src/experts/game-dev.py
@ -0,0 +1,3 @@
 """
 RAG for Unity/Godot/Unreal Engine code blocks
 """
--- a/src/experts/math.py
+++ b/src/experts/math.py
@ -0,0 +1,4 @@
 """
 Evaluator Function Calling
 Wolphram Alpha Plugin
 """
--- a/src/experts/medical.py
+++ b/src/experts/medical.py
@ -0,0 +1,3 @@
 """
 PubMed archive RAG
 """
--- a/src/experts/physics.py
+++ b/src/experts/physics.py
@ -0,0 +1,3 @@
 """
 Physics Function Calls
 """
--- a/src/experts/stackoverflow.py
+++ b/src/experts/stackoverflow.py
@ -0,0 +1,3 @@
 """
 Stack overflow query searcher and retrieval
 """
--- a/src/experts/web-design.py
+++ b/src/experts/web-design.py
@ -0,0 +1,3 @@
 """
 Tailwind UI Components code snippets RAG
 """
--- a/src/filesystem/init.py
+++ b/src/filesystem/init.py
@ -0,0 +1 @@
 from .read_code import ReadCode
--- a/src/filesystem/read_code.py
+++ b/src/filesystem/read_code.py
@ -0,0 +1,35 @@
 import os
 from src.config import Config
 """
 TODO: Replace this with `code2prompt` - https://github.com/mufeedvh/code2prompt
 """
 class ReadCode:
    def __init__(self, project_name: str):
        config = Config()
        project_path = config.get_projects_dir()
        self.directory_path = os.path.join(project_path, project_name.lower().replace(" ", "-"))
    def read_directory(self):
        files_list = []
        for root, _dirs, files in os.walk(self.directory_path):
            for file in files:
                try:
                    file_path = os.path.join(root, file)
                    with open(file_path, 'r') as file_content:
                        files_list.append({"filename": file_path, "code": file_content.read()})
                except:
                    pass
        return files_list
    def code_set_to_markdown(self):
        code_set = self.read_directory()
        markdown = ""
        for code in code_set:
            markdown += f"### {code['filename']}:\n\n"
            markdown += f"```\n{code['code']}\n```\n\n"
            markdown += "---\n\n"
        return markdown
--- a/src/init.py
+++ b/src/init.py
@ -0,0 +1,32 @@
 import os
 from src.config import Config
 from src.logger import Logger
 def init_devika():
    logger = Logger()
    logger.info("Initializing Devika...")
    logger.info("checking configurations...")
    config = Config()
    sqlite_db = config.get_sqlite_db()
    screenshots_dir = config.get_screenshots_dir()
    pdfs_dir = config.get_pdfs_dir()
    projects_dir = config.get_projects_dir()
    logs_dir = config.get_logs_dir()
    logger.info("Initializing Prerequisites Jobs...")
    os.makedirs(os.path.dirname(sqlite_db), exist_ok=True)
    os.makedirs(screenshots_dir, exist_ok=True)
    os.makedirs(pdfs_dir, exist_ok=True)
    os.makedirs(projects_dir, exist_ok=True)
    os.makedirs(logs_dir, exist_ok=True)
    from src.bert.sentence import SentenceBert
    logger.info("Loading sentence-transformer BERT models...")
    prompt = "Light-weight keyword extraction exercise for BERT model loading.".strip()
    SentenceBert(prompt).extract_keywords()
    logger.info("BERT model loaded successfully.")
--- a/src/llm/init.py
+++ b/src/llm/init.py
@ -0,0 +1 @@
 from .llm import LLM
--- a/src/llm/claude_client.py
+++ b/src/llm/claude_client.py
@ -0,0 +1,26 @@
 from anthropic import Anthropic
 from src.config import Config
 class Claude:
    def __init__(self):
        config = Config()
        api_key = config.get_claude_api_key()
        self.client = Anthropic(
            api_key=api_key,
        )
    def inference(self, model_id: str, prompt: str) -> str:
        message = self.client.messages.create(
            max_tokens=4096,
            messages=[
                {
                    "role": "user",
                    "content": prompt.strip(),
                }
            ],
            model=model_id,
            temperature=0
        )
        return message.content[0].text
--- a/src/llm/gemini_client.py
+++ b/src/llm/gemini_client.py
@ -0,0 +1,33 @@
 import google.generativeai as genai
 from google.generativeai.types import HarmCategory, HarmBlockThreshold
 from src.config import Config
 class Gemini:
    def __init__(self):
        config = Config()
        api_key = config.get_gemini_api_key()
        genai.configure(api_key=api_key)
    def inference(self, model_id: str, prompt: str) -> str:
        config = genai.GenerationConfig(temperature=0)
        model = genai.GenerativeModel(model_id, generation_config=config)
        # Set safety settings for the request
        safety_settings = {
            HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
            HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
            # You can adjust other categories as needed
        }
        response = model.generate_content(prompt, safety_settings=safety_settings)
        try:
            # Check if the response contains text
            return response.text
        except ValueError:
            # If the response doesn't contain text, check if the prompt was blocked
            print("Prompt feedback:", response.prompt_feedback)
            # Also check the finish reason to see if the response was blocked
            print("Finish reason:", response.candidates[0].finish_reason)
            # If the finish reason was SAFETY, the safety ratings have more details
            print("Safety ratings:", response.candidates[0].safety_ratings)
            # Handle the error or return an appropriate message
            return "Error: Unable to generate content Gemini API"
--- a/src/llm/groq_client.py
+++ b/src/llm/groq_client.py
@ -0,0 +1,24 @@
 from groq import Groq as _Groq
 from src.config import Config
 class Groq:
    def __init__(self):
        config = Config()
        api_key = config.get_groq_api_key()
        self.client = _Groq(api_key=api_key)
    def inference(self, model_id: str, prompt: str) -> str:
        chat_completion = self.client.chat.completions.create(
            messages=[
                {
                    "role": "user",
                    "content": prompt.strip(),
                }
            ],
            model=model_id,
            temperature=0
        )
        return chat_completion.choices[0].message.content
--- a/src/llm/llm.py
+++ b/src/llm/llm.py
@ -0,0 +1,149 @@
 import sys
 import tiktoken
 from typing import List, Tuple
 from src.socket_instance import emit_agent
 from .ollama_client import Ollama
 from .claude_client import Claude
 from .openai_client import OpenAi
 from .gemini_client import Gemini
 from .mistral_client import MistralAi
 from .groq_client import Groq
 from src.state import AgentState
 from src.config import Config
 from src.logger import Logger
 TIKTOKEN_ENC = tiktoken.get_encoding("cl100k_base")
 ollama = Ollama()
 logger = Logger()
 agentState = AgentState()
 config = Config()
 class LLM:
    def __init__(self, model_id: str = None):
        self.model_id = model_id
        self.log_prompts = config.get_logging_prompts()
        self.timeout_inference = config.get_timeout_inference()
        self.models = {
            "CLAUDE": [
                ("Claude 3 Opus", "claude-3-opus-20240229"),
                ("Claude 3 Sonnet", "claude-3-sonnet-20240229"),
                ("Claude 3 Haiku", "claude-3-haiku-20240307"),
            ],
            "OPENAI": [
                ("GPT-4o", "gpt-4o"),
                ("GPT-4 Turbo", "gpt-4-turbo"),
                ("GPT-3.5 Turbo", "gpt-3.5-turbo-0125"),
            ],
            "GOOGLE": [
                ("Gemini 1.0 Pro", "gemini-pro"),
                ("Gemini 1.5 Flash", "gemini-1.5-flash"),
                ("Gemini 1.5 Pro", "gemini-1.5-pro"),
            ],
            "MISTRAL": [
                ("Mistral 7b", "open-mistral-7b"),
                ("Mistral 8x7b", "open-mixtral-8x7b"),
                ("Mistral Medium", "mistral-medium-latest"),
                ("Mistral Small", "mistral-small-latest"),
                ("Mistral Large", "mistral-large-latest"),
            ],
            "GROQ": [
                ("LLAMA3 8B", "llama3-8b-8192"),
                ("LLAMA3 70B", "llama3-70b-8192"),
                ("LLAMA2 70B", "llama2-70b-4096"),
                ("Mixtral", "mixtral-8x7b-32768"),
                ("GEMMA 7B", "gemma-7b-it"),
            ],
            "OLLAMA": []
        }
        if ollama.client:
            self.models["OLLAMA"] = [(model["name"], model["name"]) for model in ollama.models]
    def list_models(self) -> dict:
        return self.models
    def model_enum(self, model_name: str) -> Tuple[str, str]:
        model_dict = {
            model[0]: (model_enum, model[1]) 
            for model_enum, models in self.models.items() 
            for model in models
        }
        return model_dict.get(model_name, (None, None))
    @staticmethod
    def update_global_token_usage(string: str, project_name: str):
        token_usage = len(TIKTOKEN_ENC.encode(string))
        agentState.update_token_usage(project_name, token_usage)
        total = agentState.get_latest_token_usage(project_name) + token_usage
        emit_agent("tokens", {"token_usage": total})
    def inference(self, prompt: str, project_name: str) -> str:
        self.update_global_token_usage(prompt, project_name)
        model_enum, model_name = self.model_enum(self.model_id)
        print(f"Model: {self.model_id}, Enum: {model_enum}")
        if model_enum is None:
            raise ValueError(f"Model {self.model_id} not supported")
        model_mapping = {
            "OLLAMA": ollama,
            "CLAUDE": Claude(),
            "OPENAI": OpenAi(),
            "GOOGLE": Gemini(),
            "MISTRAL": MistralAi(),
            "GROQ": Groq()
        }
        try:
            import concurrent.futures
            import time
            start_time = time.time()
            model = model_mapping[model_enum]
            with concurrent.futures.ThreadPoolExecutor() as executor:
                future = executor.submit(model.inference, model_name, prompt)
                try:
                    while True:
                        elapsed_time = time.time() - start_time
                        elapsed_seconds = format(elapsed_time, ".2f")
                        emit_agent("inference", {"type": "time", "elapsed_time": elapsed_seconds})
                        if int(elapsed_time) == 5:
                            emit_agent("inference", {"type": "warning", "message": "Inference is taking longer than expected"})
                        if elapsed_time > self.timeout_inference:
                            raise concurrent.futures.TimeoutError
                        if future.done():
                            break
                        time.sleep(0.5)
                    response = future.result(timeout=self.timeout_inference).strip()
                except concurrent.futures.TimeoutError:
                    logger.error(f"Inference failed. took too long. Model: {model_enum}, Model ID: {self.model_id}")
                    emit_agent("inference", {"type": "error", "message": "Inference took too long. Please try again."})
                    response = False
                    sys.exit()
                except Exception as e:
                    logger.error(str(e))
                    response = False
                    emit_agent("inference", {"type": "error", "message": str(e)})
                    sys.exit()
        except KeyError:
            raise ValueError(f"Model {model_enum} not supported")
        if self.log_prompts:
            logger.debug(f"Response ({model}): --> {response}")
        self.update_global_token_usage(response, project_name)
        return response
--- a/src/llm/mistral_client.py
+++ b/src/llm/mistral_client.py
@ -0,0 +1,22 @@
 from mistralai.client import MistralClient
 from mistralai.models.chat_completion import ChatMessage
 from src.config import Config
 class MistralAi:
    def __init__(self):
        config = Config()
        api_key = config.get_mistral_api_key()
        self.client = MistralClient(api_key=api_key)
    def inference(self, model_id: str, prompt: str) -> str:
        print("prompt", prompt.strip())
        chat_completion = self.client.chat(
            model=model_id,
            messages=[
                ChatMessage(role="user", content=prompt.strip())
            ],
            temperature=0
        )
        return chat_completion.choices[0].message.content
--- a/src/llm/ollama_client.py
+++ b/src/llm/ollama_client.py
@ -0,0 +1,25 @@
 import ollama
 from src.logger import Logger
 from src.config import Config
 log = Logger()
 class Ollama:
    def __init__(self):
        try:
            self.client = ollama.Client(Config().get_ollama_api_endpoint())
            self.models = self.client.list()["models"]
            log.info("Ollama available")
        except:
            self.client = None
            log.warning("Ollama not available")
            log.warning("run ollama server to use ollama models otherwise use API models")
    def inference(self, model_id: str, prompt: str) -> str:
        response = self.client.generate(
            model=model_id,
            prompt=prompt.strip(),
            options={"temperature": 0}
        )
        return response['response']
--- a/src/llm/openai_client.py
+++ b/src/llm/openai_client.py
@ -0,0 +1,24 @@
 from openai import OpenAI
 from src.config import Config
 class OpenAi:
    def __init__(self):
        config = Config()
        api_key = config.get_openai_api_key()
        base_url = config.get_openai_api_base_url()
        self.client = OpenAI(api_key=api_key, base_url=base_url)
    def inference(self, model_id: str, prompt: str) -> str:
        chat_completion = self.client.chat.completions.create(
            messages=[
                {
                    "role": "user",
                    "content": prompt.strip(),
                }
            ],
            model=model_id,
            temperature=0
        )
        return chat_completion.choices[0].message.content
--- a/src/logger.py
+++ b/src/logger.py
@ -0,0 +1,78 @@
 from functools import wraps
 from fastlogging import LogInit
 from flask import request
 from src.config import Config
 class Logger:
    def __init__(self, filename="devika_agent.log"):
        config = Config()
        logs_dir = config.get_logs_dir()
        self.logger = LogInit(pathName=logs_dir + "/" + filename, console=True, colors=True, encoding="utf-8")
    def read_log_file(self) -> str:
        with open(self.logger.pathName, "r") as file:
            return file.read()
    def info(self, message: str):
        self.logger.info(message)
        self.logger.flush()
    def error(self, message: str):
        self.logger.error(message)
        self.logger.flush()
    def warning(self, message: str):
        self.logger.warning(message)
        self.logger.flush()
    def debug(self, message: str):
        self.logger.debug(message)
        self.logger.flush()
    def exception(self, message: str):
        self.logger.exception(message)
        self.logger.flush()
 def route_logger(logger: Logger):
    """
    Decorator factory that creates a decorator to log route entry and exit points.
    The decorator uses the provided logger to log the information.
    :param logger: The logger instance to use for logging.
    """
    log_enabled = Config().get_logging_rest_api()
    def decorator(func):
        @wraps(func)
        def wrapper(*args, **kwargs):
            # Log entry point
            if log_enabled:
                logger.info(f"{request.path} {request.method}")
            # Call the actual route function
            response = func(*args, **kwargs)
            from werkzeug.wrappers import Response
            # Log exit point, including response summary if possible
            try:
                if log_enabled:
                    if isinstance(response, Response) and response.direct_passthrough:
                        logger.debug(f"{request.path} {request.method} - Response: File response")
                    else:
                        response_summary = response.get_data(as_text=True)
                        if 'settings' in request.path:
                            response_summary = "*** Settings are not logged ***"
                        logger.debug(f"{request.path} {request.method} - Response: {response_summary}")
            except Exception as e:
                logger.exception(f"{request.path} {request.method} - {e})")
            return response
        return wrapper
    return decorator
--- a/src/memory/init.py
+++ b/src/memory/init.py
@ -0,0 +1 @@
 from .knowledge_base import KnowledgeBase
--- a/src/memory/knowledge_base.py
+++ b/src/memory/knowledge_base.py
@ -0,0 +1,33 @@
 from typing import Optional
 from sqlmodel import Field, Session, SQLModel, create_engine
 from src.config import Config
 """
 TODO: The tag check should be a BM25 search, it's just a simple equality check now.
 """
 class Knowledge(SQLModel, table=True):
    id: Optional[int] = Field(default=None, primary_key=True)
    tag: str
    contents: str
 class KnowledgeBase:
    def __init__(self):
        config = Config()
        sqlite_path = config.get_sqlite_db()
        self.engine = create_engine(f"sqlite:///{sqlite_path}")
        SQLModel.metadata.create_all(self.engine)
    def add_knowledge(self, tag: str, contents: str):
        knowledge = Knowledge(tag=tag, contents=contents)
        with Session(self.engine) as session:
            session.add(knowledge)
            session.commit()
    def get_knowledge(self, tag: str) -> str:
        with Session(self.engine) as session:
            knowledge = session.query(Knowledge).filter(Knowledge.tag == tag).first()
            if knowledge:
                return knowledge.contents
            return None
--- a/src/memory/rag.py
+++ b/src/memory/rag.py
@ -0,0 +1,3 @@
 """
 Vector Search for Code Docs + Docs Loading
 """
--- a/Show More
+++ b/Show More
		`@ -0,0 +1 @@`
							`from .internal_monologue import InternalMonologue`
		`@ -0,0 +1,2 @@`
							`from .browser import Browser`
							`from .interaction import start_interaction`