init devika repo
This commit is contained in:
commit
f0b94ab9bd
BIN
.assets/devika-avatar.png
Normal file
BIN
.assets/devika-avatar.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 911 KiB |
BIN
.assets/devika-pygame-demo.mp4
Normal file
BIN
.assets/devika-pygame-demo.mp4
Normal file
Binary file not shown.
BIN
.assets/devika-screenshot.png
Normal file
BIN
.assets/devika-screenshot.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 747 KiB |
163
.gitignore
vendored
Normal file
163
.gitignore
vendored
Normal file
|
@ -0,0 +1,163 @@
|
|||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
share/python-wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
MANIFEST
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.nox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*.cover
|
||||
*.py,cover
|
||||
.hypothesis/
|
||||
.pytest_cache/
|
||||
cover/
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
*.pot
|
||||
|
||||
# Django stuff:
|
||||
*.log
|
||||
local_settings.py
|
||||
db.sqlite3
|
||||
db.sqlite3-journal
|
||||
|
||||
# Flask stuff:
|
||||
instance/
|
||||
.webassets-cache
|
||||
|
||||
# Scrapy stuff:
|
||||
.scrapy
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
|
||||
# PyBuilder
|
||||
.pybuilder/
|
||||
target/
|
||||
|
||||
# Jupyter Notebook
|
||||
.ipynb_checkpoints
|
||||
|
||||
# IPython
|
||||
profile_default/
|
||||
ipython_config.py
|
||||
|
||||
# pyenv
|
||||
# For a library or package, you might want to ignore these files since the code is
|
||||
# intended to run in multiple environments; otherwise, check them in:
|
||||
# .python-version
|
||||
|
||||
# pipenv
|
||||
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
||||
# install all needed dependencies.
|
||||
#Pipfile.lock
|
||||
|
||||
# poetry
|
||||
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
||||
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
||||
# commonly ignored for libraries.
|
||||
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
||||
#poetry.lock
|
||||
|
||||
# pdm
|
||||
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
||||
#pdm.lock
|
||||
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
||||
# in version control.
|
||||
# https://pdm.fming.dev/#use-with-ide
|
||||
.pdm.toml
|
||||
|
||||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
||||
__pypackages__/
|
||||
|
||||
# Celery stuff
|
||||
celerybeat-schedule
|
||||
celerybeat.pid
|
||||
|
||||
# SageMath parsed files
|
||||
*.sage.py
|
||||
|
||||
# Environments
|
||||
.env
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
config.toml
|
||||
|
||||
# Spyder project settings
|
||||
.spyderproject
|
||||
.spyproject
|
||||
|
||||
# Rope project settings
|
||||
.ropeproject
|
||||
|
||||
# mkdocs documentation
|
||||
/site
|
||||
|
||||
# mypy
|
||||
.mypy_cache/
|
||||
.dmypy.json
|
||||
dmypy.json
|
||||
|
||||
# Pyre type checker
|
||||
.pyre/
|
||||
|
||||
# pytype static type analyzer
|
||||
.pytype/
|
||||
|
||||
# Cython debug symbols
|
||||
cython_debug/
|
||||
|
||||
# PyCharm
|
||||
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
||||
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
||||
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
||||
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||
.idea/
|
||||
|
||||
notes.md
|
||||
data/
|
251
ARCHITECTURE.md
Normal file
251
ARCHITECTURE.md
Normal file
|
@ -0,0 +1,251 @@
|
|||
# Devika Architecture
|
||||
|
||||
Devika is an advanced AI software engineer that can understand high-level human instructions, break them down into steps, research relevant information, and write code to achieve a given objective. This document provides a detailed technical overview of Devika's system architecture and how the various components work together.
|
||||
|
||||
## Table of Contents
|
||||
|
||||
1. [Overview](#overview)
|
||||
2. [Agent Core](#agent-core)
|
||||
3. [Agents](#agents)
|
||||
- [Planner](#planner)
|
||||
- [Researcher](#researcher)
|
||||
- [Coder](#coder)
|
||||
- [Action](#action)
|
||||
- [Runner](#runner)
|
||||
- [Feature](#feature)
|
||||
- [Patcher](#patcher)
|
||||
- [Reporter](#reporter)
|
||||
- [Decision](#decision)
|
||||
4. [Language Models](#language-models)
|
||||
5. [Browser Interaction](#browser-interaction)
|
||||
6. [Project Management](#project-management)
|
||||
7. [Agent State Management](#agent-state-management)
|
||||
8. [Services](#services)
|
||||
9. [Utilities](#utilities)
|
||||
10. [Conclusion](#conclusion)
|
||||
|
||||
## Overview
|
||||
|
||||
At a high level, Devika consists of the following key components:
|
||||
|
||||
- **Agent Core**: Orchestrates the overall AI planning, reasoning and execution process. Communicates with various sub-agents.
|
||||
- **Agents**: Specialized sub-agents that handle specific tasks like planning, research, coding, patching, reporting etc.
|
||||
- **Language Models**: Leverages large language models (LLMs) like Claude, GPT-4, GPT-3 for natural language understanding and generation.
|
||||
- **Browser Interaction**: Enables web browsing, information gathering, and interaction with web elements.
|
||||
- **Project Management**: Handles organization and persistence of project-related data.
|
||||
- **Agent State Management**: Tracks and persists the dynamic state of the AI agent across interactions.
|
||||
- **Services**: Integrations with external services like GitHub, Netlify for enhanced capabilities.
|
||||
- **Utilities**: Supporting modules for configuration, logging, vector search, PDF generation etc.
|
||||
|
||||
Let's dive into each of these components in more detail.
|
||||
|
||||
## Agent Core
|
||||
|
||||
The `Agent` class serves as the central engine that drives Devika's AI planning and execution loop. Here's how it works:
|
||||
|
||||
1. When a user provides a high-level prompt, the `execute` method is invoked on the Agent.
|
||||
2. The prompt is first passed to the Planner agent to generate a step-by-step plan.
|
||||
3. The Researcher agent then takes this plan and extracts relevant search queries and context.
|
||||
4. The Agent performs web searches using Bing Search API and crawls the top results.
|
||||
5. The raw crawled content is passed through the Formatter agent to extract clean, relevant information.
|
||||
6. This researched context, along with the step-by-step plan, is fed to the Coder agent to generate code.
|
||||
7. The generated code is saved to the project directory on disk.
|
||||
8. If the user interacts further with a follow-up prompt, the `subsequent_execute` method is invoked.
|
||||
9. The Action agent determines the appropriate action to take based on the user's message (run code, deploy, write tests, add feature, fix bug, write report etc.)
|
||||
10. The corresponding specialized agent is invoked to perform the action (Runner, Feature, Patcher, Reporter).
|
||||
11. Results are communicated back to the user and the project files are updated.
|
||||
|
||||
Throughout this process, the Agent Core is responsible for:
|
||||
- Managing conversation history and project-specific context
|
||||
- Updating agent state and internal monologue
|
||||
- Accumulating context keywords across agent prompts
|
||||
- Emulating the "thinking" process of the AI through timed agent state updates
|
||||
- Handling special commands through the Decision agent (e.g. git clone, browser interaction session)
|
||||
|
||||
## Agents
|
||||
|
||||
Devika's cognitive abilities are powered by a collection of specialized sub-agents. Each agent is implemented as a separate Python class. Agents communicate with the underlying LLMs through prompt templates defined in Jinja2 format. Key agents include:
|
||||
|
||||
### Planner
|
||||
- Generates a high-level step-by-step plan based on the user's prompt
|
||||
- Extracts focus area and provides a summary
|
||||
- Uses few-shot prompting to provide examples of the expected response format
|
||||
|
||||
### Researcher
|
||||
- Takes the generated plan and extracts relevant search queries
|
||||
- Ranks and filters queries based on relevance and specificity
|
||||
- Prompts the user for additional context if required
|
||||
- Aims to maximize information gain while minimizing number of searches
|
||||
|
||||
### Coder
|
||||
- Generates code based on the step-by-step plan and researched context
|
||||
- Segments code into appropriate files and directories
|
||||
- Includes informative comments and documentation
|
||||
- Handles a variety of languages and frameworks
|
||||
- Validates code syntax and style
|
||||
|
||||
### Action
|
||||
- Determines the appropriate action to take based on the user's follow-up prompt
|
||||
- Maps user intent to a specific action keyword (run, test, deploy, fix, implement, report)
|
||||
- Provides a human-like confirmation of the action to the user
|
||||
|
||||
### Runner
|
||||
- Executes the written code in a sandboxed environment
|
||||
- Handles different OS environments (Mac, Linux, Windows)
|
||||
- Streams command output to user in real-time
|
||||
- Gracefully handles errors and exceptions
|
||||
|
||||
### Feature
|
||||
- Implements a new feature based on user's specification
|
||||
- Modifies existing project files while maintaining code structure and style
|
||||
- Performs incremental testing to verify feature is working as expected
|
||||
|
||||
### Patcher
|
||||
- Debugs and fixes issues based on user's description or error message
|
||||
- Analyzes existing code to identify potential root causes
|
||||
- Suggests and implements fix, with explanation of the changes made
|
||||
|
||||
### Reporter
|
||||
- Generates a comprehensive report summarizing the project
|
||||
- Includes high-level overview, technical design, setup instructions, API docs etc.
|
||||
- Formats report in a clean, readable structure with table of contents
|
||||
- Exports report as a PDF document
|
||||
|
||||
### Decision
|
||||
- Handles special command-like instructions that don't fit other agents
|
||||
- Maps commands to specific functions (git clone, browser interaction etc.)
|
||||
- Executes the corresponding function with provided arguments
|
||||
|
||||
Each agent follows a common pattern:
|
||||
1. Prepare a prompt by rendering the Jinja2 template with current context
|
||||
2. Query the LLM to get a response based on the prompt
|
||||
3. Validate and parse the LLM's response to extract structured output
|
||||
4. Perform any additional processing or side-effects (e.g. save to disk)
|
||||
5. Return the result to the Agent Core for further action
|
||||
|
||||
Agents aim to be stateless and idempotent where possible. State and history is managed by the Agent Core and passed into the agents as needed. This allows for a modular, composable design.
|
||||
|
||||
## Language Models
|
||||
|
||||
Devika's natural language processing capabilities are driven by state-of-the-art LLMs. The `LLM` class provides a unified interface to interact with different language models:
|
||||
|
||||
- **Claude** (Anthropic): Claude models like claude-v1.3, claude-instant-v1.0 etc.
|
||||
- **GPT-4/GPT-3** (OpenAI): Models like gpt-4, gpt-3.5-turbo etc.
|
||||
- **Self-hosted models** (via [Ollama](https://ollama.com/)): Allows using open-source models in a self-hosted environment
|
||||
|
||||
The `LLM` class abstracts out the specifics of each provider's API, allowing agents to interact with the models in a consistent way. It supports:
|
||||
- Listing available models
|
||||
- Generating completions based on a prompt
|
||||
- Tracking and accumulating token usage over time
|
||||
|
||||
Choosing the right model for a given use case depends on factors like desired quality, speed, cost etc. The modular design allows swapping out models easily.
|
||||
|
||||
## Browser Interaction
|
||||
|
||||
Devika can interact with webpages in an automated fashion to gather information and perform actions. This is powered by the `Browser` and `Crawler` classes.
|
||||
|
||||
The `Browser` class uses Playwright to provide high-level web automation primitives:
|
||||
- Spawning a browser instance (Chromium)
|
||||
- Navigating to a URL
|
||||
- Querying DOM elements
|
||||
- Extracting page content as text, Markdown, PDF etc.
|
||||
- Taking a screenshot of the page
|
||||
|
||||
The `Crawler` class defines an agent that can interact with a webpage based on natural language instructions. It leverages:
|
||||
- Pre-defined browser actions like scroll, click, type etc.
|
||||
- A prompt template that provides examples of how to use these actions
|
||||
- LLM to determine the best action to take based on current page content and objective
|
||||
|
||||
The `start_interaction` function sets up a loop where:
|
||||
1. The current page content and objective is passed to the LLM
|
||||
2. The LLM returns the next best action to take (e.g. "CLICK 12" or "TYPE 7 machine learning")
|
||||
3. The Crawler executes this action on the live page
|
||||
4. The process repeats from the updated page state
|
||||
|
||||
This allows performing a sequence of actions to achieve a higher-level objective (e.g. research a topic, fill out a form, interact with an app etc.)
|
||||
|
||||
## Project Management
|
||||
|
||||
The `ProjectManager` class is responsible for creating, updating and querying projects and their associated metadata. Key functions include:
|
||||
|
||||
- Creating a new project and initializing its directory structure
|
||||
- Deleting a project and its associated files
|
||||
- Adding a message to a project's conversation history
|
||||
- Retrieving messages for a given project
|
||||
- Getting the latest user/AI message in a conversation
|
||||
- Listing all projects
|
||||
- Zipping a project's files for export
|
||||
|
||||
Project metadata is persisted in a SQLite database using SQLModel. The `Projects` table stores:
|
||||
- Project name
|
||||
- JSON-serialized conversation history
|
||||
|
||||
This allows the agent to work on multiple projects simultaneously and retain conversation history across sessions.
|
||||
|
||||
## Agent State Management
|
||||
|
||||
As the AI agent works on a task, we need to track and display its internal state to the user. The `AgentState` class handles this by providing an interface to:
|
||||
|
||||
- Initialize a new agent state
|
||||
- Add a state to the current sequence of states for a project
|
||||
- Update the latest state for a project
|
||||
- Query the latest state or entire state history for a project
|
||||
- Mark the agent as active/inactive or task as completed
|
||||
|
||||
Agent state includes information like:
|
||||
- Current step or action being executed
|
||||
- Internal monologue reflecting the agent's current "thoughts"
|
||||
- Browser interactions (URL visited, screenshot)
|
||||
- Terminal interactions (command executed, output)
|
||||
- Token usage so far
|
||||
|
||||
Like projects, agent states are also persisted in the SQLite DB using SQLModel. The `AgentStateModel` table stores:
|
||||
- Project name
|
||||
- JSON-serialized list of states
|
||||
|
||||
Having a persistent log of agent states is useful for:
|
||||
- Providing real-time visibility to the user
|
||||
- Auditing and debugging agent behavior
|
||||
- Resuming from interruptions or failures
|
||||
|
||||
## Services
|
||||
|
||||
Devika integrates with external services to augment its capabilities:
|
||||
|
||||
- **GitHub**: Performing git operations like clone/pull, listing repos/commits/files etc.
|
||||
- **Netlify**: Deploying web apps and sites seamlessly
|
||||
|
||||
The `GitHub` and `Netlify` classes provide lightweight wrappers around the respective service APIs.
|
||||
They handle authentication, making HTTP requests, and parsing responses.
|
||||
|
||||
This allows Devika to perform actions like:
|
||||
- Cloning a repo given a GitHub URL
|
||||
- Listing a user's GitHub repos
|
||||
- Creating a new Netlify site
|
||||
- Deploying a directory to Netlify
|
||||
- Providing the deployed site URL to the user
|
||||
|
||||
Integrations are done in a modular way so that new services can be added easily.
|
||||
|
||||
## Utilities
|
||||
|
||||
Devika makes use of several utility modules to support its functioning:
|
||||
|
||||
- `Config`: Loads and provides access to configuration settings (API keys, folder paths etc.)
|
||||
- `Logger`: Sets up logging to console and file, with support for log levels and colors
|
||||
- `ReadCode`: Recursively reads code files in a directory and converts them into a Markdown format
|
||||
- `SentenceBERT`: Extracts keywords and semantic information from text using SentenceBERT embeddings
|
||||
- `Experts`: A collection of domain-specific knowledge bases to assist in certain areas (e.g. webdev, physics, chemistry, math)
|
||||
|
||||
The utility modules aim to provide reusable functionality that is used across different parts of the system.
|
||||
|
||||
## Conclusion
|
||||
|
||||
Devika is a complex system that combines multiple AI and automation techniques to deliver an intelligent programming assistant. Key design principles include:
|
||||
|
||||
- Modularity: Breaking down functionality into specialized agents and services
|
||||
- Flexibility: Supporting different LLMs, services and domains in a pluggable fashion
|
||||
- Persistence: Storing project and agent state in a DB to enable pause/resume and auditing
|
||||
- Transparency: Surfacing agent thought process and interactions to user in real-time
|
||||
|
||||
By understanding how the different components work together, we can extend, optimize and scale Devika to take on increasingly sophisticated software engineering tasks. The agent-based architecture provides a strong foundation to build more advanced AI capabilities in the future.
|
38
CONTRIBUTING.md
Normal file
38
CONTRIBUTING.md
Normal file
|
@ -0,0 +1,38 @@
|
|||
# Welcome Contributors
|
||||
We welcome contributions to enhance Devika's capabilities and improve its performance. To report bugs, create a [GitHub issue](https://github.com/stitionai/devika/issues).
|
||||
|
||||
> Before contributing, read through the existing issues and pull requests to see if someone else is already working on something similar. That way you can avoid duplicating efforts.
|
||||
|
||||
To contribute, please follow these steps:
|
||||
|
||||
1. Fork the Devika repository on GitHub.
|
||||
2. Create a new branch for your feature or bug fix.
|
||||
3. Make your changes and ensure that the code passes all tests.
|
||||
4. Submit a pull request describing your changes and their benefits.
|
||||
|
||||
|
||||
### Pull Request Guidelines
|
||||
When submitting a pull request, please follow these guidelines:
|
||||
|
||||
1. **Title**: please include following prefixes:
|
||||
- `Feature:` for new features
|
||||
- `Fix:` for bug fixes
|
||||
- `Docs:` for documentation changes
|
||||
- `Refactor:` for code refactoring
|
||||
- `Improve:` for performance improvements
|
||||
- `Other:` for other changes
|
||||
|
||||
for example:
|
||||
- `Feature: added new feature to the code`
|
||||
- `Fix: fixed the bug in the code`
|
||||
|
||||
2. **Description**: Provide a clear and detailed description of your changes in the pull request. Explain the problem you are solving, the approach you took, and any potential side effects or limitations of your changes.
|
||||
3. **Documentation**: Update the relevant documentation to reflect your changes. This includes the README file, code comments, and any other relevant documentation.
|
||||
4. **Dependencies**: If your changes require new dependencies, ensure that they are properly documented and added to the `requirements.txt` or `package.json` files.
|
||||
5. if the pull request does not meet the above guidelines, it may be closed without merging.
|
||||
|
||||
|
||||
**Note**: Please ensure that you have the latest version of the code before creating a pull request. If you have an existing fork, just sync your fork with the latest version of the Devika repository.
|
||||
|
||||
|
||||
Please adhere to the coding conventions, maintain clear documentation, and provide thorough testing for your contributions.
|
21
LICENSE
Normal file
21
LICENSE
Normal file
|
@ -0,0 +1,21 @@
|
|||
MIT License
|
||||
|
||||
Copyright (c) 2024 stition
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
33
Makefile
Normal file
33
Makefile
Normal file
|
@ -0,0 +1,33 @@
|
|||
|
||||
.PHONY = setup deps compose-up compose-down compose-destroy
|
||||
|
||||
# to check if docker is installed on the machine
|
||||
DOCKER := $(shell command -v docker)
|
||||
DOCKER_COMPOSE := $(shell command -v docker-compose)
|
||||
deps:
|
||||
ifndef DOCKER
|
||||
@echo "Docker is not available. Please install docker"
|
||||
@echo "try running sudo apt-get install docker"
|
||||
@exit 1
|
||||
endif
|
||||
ifndef DOCKER_COMPOSE
|
||||
@echo "docker-compose is not available. Please install docker-compose"
|
||||
@echo "try running sudo apt-get install docker-compose"
|
||||
@exit 1
|
||||
endif
|
||||
|
||||
setup:
|
||||
sh +x build
|
||||
|
||||
compose-down: deps
|
||||
docker volume ls
|
||||
docker-compose ps
|
||||
docker images
|
||||
docker-compose down;
|
||||
|
||||
compose-up: deps compose-down
|
||||
docker-compose up --build
|
||||
|
||||
compose-destroy: deps
|
||||
docker images | grep -i devika | awk '{print $$3}' | xargs docker rmi -f
|
||||
docker volume prune
|
183
README.md
Normal file
183
README.md
Normal file
|
@ -0,0 +1,183 @@
|
|||
<p align="center">
|
||||
<img src=".assets/devika-avatar.png" alt="Devika Logo" width="250">
|
||||
</p>
|
||||
|
||||
<h1 align="center">🚀 Devika - Agentic AI Software Engineer 👩💻</h1>
|
||||
|
||||
![devika screenshot](.assets/devika-screenshot.png)
|
||||
|
||||
> [!IMPORTANT]
|
||||
> This project is currently in a very early development/experimental stage. There are a lot of unimplemented/broken features at the moment. Contributions are welcome to help out with the progress!
|
||||
|
||||
## Table of Contents
|
||||
|
||||
- [About](#about)
|
||||
- [Key Features](#key-features)
|
||||
- [System Architecture](#system-architecture)
|
||||
- [Getting Started](#getting-started)
|
||||
- [Requirements](#requirements)
|
||||
- [Installation](#installation)
|
||||
- [How to use](#how-to-use)
|
||||
- [Configuration](#configuration)
|
||||
- [Contributing](#contributing)
|
||||
- [Help and Support](#help-and-support)
|
||||
- [License](#license)
|
||||
|
||||
## About
|
||||
|
||||
Devika is an advanced AI software engineer that can understand high-level human instructions, break them down into steps, research relevant information, and write code to achieve the given objective. Devika utilizes large language models, planning and reasoning algorithms, and web browsing abilities to intelligently develop software.
|
||||
|
||||
Devika aims to revolutionize the way we build software by providing an AI pair programmer who can take on complex coding tasks with minimal human guidance. Whether you need to create a new feature, fix a bug, or develop an entire project from scratch, Devika is here to assist you.
|
||||
|
||||
> [!NOTE]
|
||||
> Devika is modeled after [Devin](https://www.cognition-labs.com/introducing-devin) by Cognition AI. This project aims to be an open-source alternative to Devin with an "overly ambitious" goal to meet the same score as Devin in the [SWE-bench](https://www.swebench.com/) Benchmarks... and eventually beat it?
|
||||
|
||||
## Demos
|
||||
|
||||
https://github.com/stitionai/devika/assets/26198477/cfed6945-d53b-4189-9fbe-669690204206
|
||||
|
||||
## Key Features
|
||||
|
||||
- 🤖 Supports **Claude 3**, **GPT-4**, **Gemini**, **Mistral** , **Groq** and **Local LLMs** via [Ollama](https://ollama.com). For optimal performance: Use the **Claude 3** family of models.
|
||||
- 🧠 Advanced AI planning and reasoning capabilities
|
||||
- 🔍 Contextual keyword extraction for focused research
|
||||
- 🌐 Seamless web browsing and information gathering
|
||||
- 💻 Code writing in multiple programming languages
|
||||
- 📊 Dynamic agent state tracking and visualization
|
||||
- 💬 Natural language interaction via chat interface
|
||||
- 📂 Project-based organization and management
|
||||
- 🔌 Extensible architecture for adding new features and integrations
|
||||
|
||||
## System Architecture
|
||||
|
||||
Read [**README.md**](docs/architecture) for the detailed documentation.
|
||||
|
||||
|
||||
## Getting Started
|
||||
|
||||
### Requirements
|
||||
```
|
||||
Version's requirements
|
||||
- Python >= 3.10 and < 3.12
|
||||
- NodeJs >= 18
|
||||
- bun
|
||||
```
|
||||
|
||||
- Install uv - Python Package manager [download](https://github.com/astral-sh/uv)
|
||||
- Install bun - JavaScript runtime [download](https://bun.sh/docs/installation)
|
||||
- For ollama [ollama setup guide](docs/Installation/ollama.md) (optinal: if you don't want to use the local models then you can skip this step)
|
||||
- For API models, configure the API keys via setting page in UI.
|
||||
|
||||
|
||||
### Installation
|
||||
|
||||
To install Devika, follow these steps:
|
||||
|
||||
1. Clone the Devika repository:
|
||||
```bash
|
||||
git clone https://github.com/stitionai/devika.git
|
||||
```
|
||||
2. Navigate to the project directory:
|
||||
```bash
|
||||
cd devika
|
||||
```
|
||||
3. Create a virtual environment and install the required dependencies (you can use any virtual environment manager):
|
||||
```bash
|
||||
uv venv
|
||||
|
||||
# On macOS and Linux.
|
||||
source .venv/bin/activate
|
||||
|
||||
# On Windows.
|
||||
.venv\Scripts\activate
|
||||
|
||||
uv pip install -r requirements.txt
|
||||
```
|
||||
4. Install the playwright for browsering capabilities:
|
||||
```bash
|
||||
playwright install --with-deps # installs browsers in playwright (and their deps) if required
|
||||
```
|
||||
5. Start the Devika server:
|
||||
```bash
|
||||
python devika.py
|
||||
```
|
||||
6. if everything is working fine, you see the following output:
|
||||
```bash
|
||||
root: INFO : Devika is up and running!
|
||||
```
|
||||
7. Now, for frontend, open a new terminal and navigate to the `ui` directory:
|
||||
```bash
|
||||
cd ui/
|
||||
bun install
|
||||
bun run start
|
||||
```
|
||||
8. Access the Devika web interface by opening a browser and navigating to `http://127.0.0.1:3001`
|
||||
|
||||
### how to use
|
||||
|
||||
To start using Devika, follow these steps:
|
||||
|
||||
1. Open the Devika web interface in your browser.
|
||||
2. To create a project, click on 'select project' and then click on 'new project'.
|
||||
3. Select the search engine and model configuration for your project.
|
||||
4. In the chat interface, provide a high-level objective or task description for Devika to work on.
|
||||
5. Devika will process your request, break it down into steps, and start working on the task.
|
||||
6. Monitor Devika's progress, view generated code, and provide additional guidance or feedback as needed.
|
||||
7. Once Devika completes the task, review the generated code and project files.
|
||||
8. Iterate and refine the project as desired by providing further instructions or modifications.
|
||||
|
||||
## Configuration
|
||||
|
||||
Devika requires certain configuration settings and API keys to function properly:
|
||||
|
||||
when you first time run Devika, it will create a `config.toml` file for you in the root directory. You can configure the following settings in the settings page via UI:
|
||||
|
||||
- API KEYS
|
||||
- `BING`: Your Bing Search API key for web searching capabilities.
|
||||
- `GOOGLE_SEARCH`: Your Google Search API key for web searching capabilities.
|
||||
- `GOOGLE_SEARCH_ENGINE_ID`: Your Google Search Engine ID for web searching using Google.
|
||||
- `OPENAI`: Your OpenAI API key for accessing GPT models.
|
||||
- `GEMINI`: Your Gemini API key for accessing Gemini models.
|
||||
- `CLAUDE`: Your Anthropic API key for accessing Claude models.
|
||||
- `MISTRAL`: Your Mistral API key for accessing Mistral models.
|
||||
- `GROQ`: Your Groq API key for accessing Groq models.
|
||||
- `NETLIFY`: Your Netlify API key for deploying and managing web projects.
|
||||
|
||||
- API_ENDPOINTS
|
||||
- `BING`: The Bing API endpoint for web searching.
|
||||
- `GOOGLE`: The Google API endpoint for web searching.
|
||||
- `OLLAMA`: The Ollama API endpoint for accessing Local LLMs.
|
||||
- `OPENAI`: The OpenAI API endpoint for accessing OpenAI models.
|
||||
|
||||
Make sure to keep your API keys secure and do not share them publicly. For setting up the Bing and Google search API keys, follow the instructions in the [search engine setup](docs/Installation/search_engine.md)
|
||||
|
||||
|
||||
## Contributing
|
||||
|
||||
We welcome contributions to enhance Devika's capabilities and improve its performance. To contribute, please see the [`CONTRIBUTING.md`](CONTRIBUTING.md) file for steps.
|
||||
|
||||
## Help and Support
|
||||
|
||||
If you have any questions, feedback, or suggestions, please feel free to reach out to us. you can raise an issue in the [issue tracker](https://github.com/stitionai/devika/issues) or join the [discussions](https://github.com/stitionai/devika/discussions) for general discussions.
|
||||
|
||||
We also have a Discord server for the Devika community, where you can connect with other users, share your experiences, ask questions, and collaborate on the project. To join the Devika community Discord server, [click here](https://discord.gg/CYRp43878y).
|
||||
|
||||
## License
|
||||
|
||||
Devika is released under the [MIT License](https://opensource.org/licenses/MIT). See the `LICENSE` file for more information.
|
||||
|
||||
## Star History
|
||||
|
||||
<div align="center">
|
||||
<a href="https://star-history.com/#stitionai/devika&Date">
|
||||
<picture>
|
||||
<source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/svg?repos=stitionai/devika&type=Date&theme=dark" />
|
||||
<source media="(prefers-color-scheme: light)" srcset="https://api.star-history.com/svg?repos=stitionai/devika&type=Date" />
|
||||
<img alt="Star History Chart" src="https://api.star-history.com/svg?repos=stitionai/devika&type=Date" />
|
||||
</picture>
|
||||
</a>
|
||||
</div>
|
||||
|
||||
---
|
||||
|
||||
We hope you find Devika to be a valuable tool in your software development journey. If you have any questions, feedback, or suggestions, please don't hesitate to reach out. Happy coding with Devika!
|
7
ROADMAP.md
Normal file
7
ROADMAP.md
Normal file
|
@ -0,0 +1,7 @@
|
|||
# Roadmap
|
||||
|
||||
- [ ] Create an extensive testing suite for all [Agents](https://github.com/stitionai/devika/tree/main/src/agents).
|
||||
- [ ] Catch down on all runtime errors and prepare for Project Devika stable release.
|
||||
- [ ] Document and implement easy cross-platform installation/setup scripts and packages.
|
||||
- [ ] Create tutorial videos on the installation steps, setup, and usage for Windows, Linux, and MacOS.
|
||||
- [ ] Focusing on the Claude 3 Opus model, test Devika on the [SWE-Bench](https://www.swebench.com/) benchmarks.
|
29
app.dockerfile
Normal file
29
app.dockerfile
Normal file
|
@ -0,0 +1,29 @@
|
|||
FROM debian:12
|
||||
|
||||
# setting up build variable
|
||||
ARG VITE_API_BASE_URL
|
||||
ENV VITE_API_BASE_URL=${VITE_API_BASE_URL}
|
||||
|
||||
# setting up os env
|
||||
USER root
|
||||
WORKDIR /home/nonroot/client
|
||||
RUN groupadd -r nonroot && useradd -r -g nonroot -d /home/nonroot/client -s /bin/bash nonroot
|
||||
|
||||
# install node js
|
||||
RUN apt-get update && apt-get upgrade -y
|
||||
RUN apt-get install -y build-essential software-properties-common curl sudo wget git
|
||||
RUN curl -fsSL https://deb.nodesource.com/setup_20.x | sudo -E bash -
|
||||
RUN apt-get install nodejs
|
||||
|
||||
# copying devika app client only
|
||||
COPY ui /home/nonroot/client/ui
|
||||
COPY src /home/nonroot/client/src
|
||||
COPY config.toml /home/nonroot/client/
|
||||
|
||||
RUN cd ui && npm install && npm install -g npm && npm install -g bun
|
||||
RUN chown -R nonroot:nonroot /home/nonroot/client
|
||||
|
||||
USER nonroot
|
||||
WORKDIR /home/nonroot/client/ui
|
||||
|
||||
ENTRYPOINT [ "npx", "bun", "run", "dev", "--", "--host" ]
|
0
benchmarks/BENCHMARKS.md
Normal file
0
benchmarks/BENCHMARKS.md
Normal file
1
benchmarks/SWE-bench.md
Normal file
1
benchmarks/SWE-bench.md
Normal file
|
@ -0,0 +1 @@
|
|||
> ...Not yet
|
38
devika.dockerfile
Normal file
38
devika.dockerfile
Normal file
|
@ -0,0 +1,38 @@
|
|||
FROM debian:12
|
||||
|
||||
# setting up os env
|
||||
USER root
|
||||
WORKDIR /home/nonroot/devika
|
||||
RUN groupadd -r nonroot && useradd -r -g nonroot -d /home/nonroot/devika -s /bin/bash nonroot
|
||||
|
||||
ENV PYTHONUNBUFFERED 1
|
||||
ENV PYTHONDONTWRITEBYTECODE 1
|
||||
|
||||
# setting up python3
|
||||
RUN apt-get update && apt-get upgrade -y
|
||||
RUN apt-get install -y build-essential software-properties-common curl sudo wget git
|
||||
RUN apt-get install -y python3 python3-pip
|
||||
RUN curl -fsSL https://astral.sh/uv/install.sh | sudo -E bash -
|
||||
RUN $HOME/.cargo/bin/uv venv
|
||||
ENV PATH="/home/nonroot/devika/.venv/bin:$HOME/.cargo/bin:$PATH"
|
||||
|
||||
# copy devika python engine only
|
||||
RUN $HOME/.cargo/bin/uv venv
|
||||
COPY requirements.txt /home/nonroot/devika/
|
||||
RUN UV_HTTP_TIMEOUT=100000 $HOME/.cargo/bin/uv pip install -r requirements.txt
|
||||
|
||||
RUN playwright install-deps chromium
|
||||
RUN playwright install chromium
|
||||
|
||||
COPY src /home/nonroot/devika/src
|
||||
COPY config.toml /home/nonroot/devika/
|
||||
COPY sample.config.toml /home/nonroot/devika/
|
||||
COPY devika.py /home/nonroot/devika/
|
||||
RUN chown -R nonroot:nonroot /home/nonroot/devika
|
||||
|
||||
USER nonroot
|
||||
WORKDIR /home/nonroot/devika
|
||||
ENV PATH="/home/nonroot/devika/.venv/bin:$HOME/.cargo/bin:$PATH"
|
||||
RUN mkdir /home/nonroot/devika/db
|
||||
|
||||
ENTRYPOINT [ "python3", "-m", "devika" ]
|
209
devika.py
Normal file
209
devika.py
Normal file
|
@ -0,0 +1,209 @@
|
|||
"""
|
||||
DO NOT REARRANGE THE ORDER OF THE FUNCTION CALLS AND VARIABLE DECLARATIONS
|
||||
AS IT MAY CAUSE IMPORT ERRORS AND OTHER ISSUES
|
||||
"""
|
||||
from gevent import monkey
|
||||
monkey.patch_all()
|
||||
from src.init import init_devika
|
||||
init_devika()
|
||||
|
||||
|
||||
from flask import Flask, request, jsonify, send_file
|
||||
from flask_cors import CORS
|
||||
from src.socket_instance import socketio, emit_agent
|
||||
import os
|
||||
import logging
|
||||
from threading import Thread
|
||||
import tiktoken
|
||||
|
||||
from src.apis.project import project_bp
|
||||
from src.config import Config
|
||||
from src.logger import Logger, route_logger
|
||||
from src.project import ProjectManager
|
||||
from src.state import AgentState
|
||||
from src.agents import Agent
|
||||
from src.llm import LLM
|
||||
|
||||
|
||||
app = Flask(__name__)
|
||||
CORS(app, resources={r"/*": {"origins": # Change the origin to your frontend URL
|
||||
[
|
||||
"https://localhost:3000",
|
||||
"http://localhost:3000",
|
||||
]}})
|
||||
app.register_blueprint(project_bp)
|
||||
socketio.init_app(app)
|
||||
|
||||
|
||||
log = logging.getLogger("werkzeug")
|
||||
log.disabled = True
|
||||
|
||||
|
||||
TIKTOKEN_ENC = tiktoken.get_encoding("cl100k_base")
|
||||
|
||||
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
||||
|
||||
manager = ProjectManager()
|
||||
AgentState = AgentState()
|
||||
config = Config()
|
||||
logger = Logger()
|
||||
|
||||
|
||||
# initial socket
|
||||
@socketio.on('socket_connect')
|
||||
def test_connect(data):
|
||||
print("Socket connected :: ", data)
|
||||
emit_agent("socket_response", {"data": "Server Connected"})
|
||||
|
||||
|
||||
@app.route("/api/data", methods=["GET"])
|
||||
@route_logger(logger)
|
||||
def data():
|
||||
project = manager.get_project_list()
|
||||
models = LLM().list_models()
|
||||
search_engines = ["Bing", "Google", "DuckDuckGo"]
|
||||
return jsonify({"projects": project, "models": models, "search_engines": search_engines})
|
||||
|
||||
|
||||
@app.route("/api/messages", methods=["POST"])
|
||||
def get_messages():
|
||||
data = request.json
|
||||
project_name = data.get("project_name")
|
||||
messages = manager.get_messages(project_name)
|
||||
return jsonify({"messages": messages})
|
||||
|
||||
|
||||
# Main socket
|
||||
@socketio.on('user-message')
|
||||
def handle_message(data):
|
||||
logger.info(f"User message: {data}")
|
||||
message = data.get('message')
|
||||
base_model = data.get('base_model')
|
||||
project_name = data.get('project_name')
|
||||
search_engine = data.get('search_engine').lower()
|
||||
|
||||
agent = Agent(base_model=base_model, search_engine=search_engine)
|
||||
|
||||
state = AgentState.get_latest_state(project_name)
|
||||
if not state:
|
||||
thread = Thread(target=lambda: agent.execute(message, project_name))
|
||||
thread.start()
|
||||
else:
|
||||
if AgentState.is_agent_completed(project_name):
|
||||
thread = Thread(target=lambda: agent.subsequent_execute(message, project_name))
|
||||
thread.start()
|
||||
else:
|
||||
emit_agent("info", {"type": "warning", "message": "previous agent doesn't completed it's task."})
|
||||
last_state = AgentState.get_latest_state(project_name)
|
||||
if last_state["agent_is_active"] or not last_state["completed"]:
|
||||
thread = Thread(target=lambda: agent.execute(message, project_name))
|
||||
thread.start()
|
||||
else:
|
||||
thread = Thread(target=lambda: agent.subsequent_execute(message, project_name))
|
||||
thread.start()
|
||||
|
||||
@app.route("/api/is-agent-active", methods=["POST"])
|
||||
@route_logger(logger)
|
||||
def is_agent_active():
|
||||
data = request.json
|
||||
project_name = data.get("project_name")
|
||||
is_active = AgentState.is_agent_active(project_name)
|
||||
return jsonify({"is_active": is_active})
|
||||
|
||||
|
||||
@app.route("/api/get-agent-state", methods=["POST"])
|
||||
@route_logger(logger)
|
||||
def get_agent_state():
|
||||
data = request.json
|
||||
project_name = data.get("project_name")
|
||||
agent_state = AgentState.get_latest_state(project_name)
|
||||
return jsonify({"state": agent_state})
|
||||
|
||||
|
||||
@app.route("/api/get-browser-snapshot", methods=["GET"])
|
||||
@route_logger(logger)
|
||||
def browser_snapshot():
|
||||
snapshot_path = request.args.get("snapshot_path")
|
||||
return send_file(snapshot_path, as_attachment=True)
|
||||
|
||||
|
||||
@app.route("/api/get-browser-session", methods=["GET"])
|
||||
@route_logger(logger)
|
||||
def get_browser_session():
|
||||
project_name = request.args.get("project_name")
|
||||
agent_state = AgentState.get_latest_state(project_name)
|
||||
if not agent_state:
|
||||
return jsonify({"session": None})
|
||||
else:
|
||||
browser_session = agent_state["browser_session"]
|
||||
return jsonify({"session": browser_session})
|
||||
|
||||
|
||||
@app.route("/api/get-terminal-session", methods=["GET"])
|
||||
@route_logger(logger)
|
||||
def get_terminal_session():
|
||||
project_name = request.args.get("project_name")
|
||||
agent_state = AgentState.get_latest_state(project_name)
|
||||
if not agent_state:
|
||||
return jsonify({"terminal_state": None})
|
||||
else:
|
||||
terminal_state = agent_state["terminal_session"]
|
||||
return jsonify({"terminal_state": terminal_state})
|
||||
|
||||
|
||||
@app.route("/api/run-code", methods=["POST"])
|
||||
@route_logger(logger)
|
||||
def run_code():
|
||||
data = request.json
|
||||
project_name = data.get("project_name")
|
||||
code = data.get("code")
|
||||
# TODO: Implement code execution logic
|
||||
return jsonify({"message": "Code execution started"})
|
||||
|
||||
|
||||
@app.route("/api/calculate-tokens", methods=["POST"])
|
||||
@route_logger(logger)
|
||||
def calculate_tokens():
|
||||
data = request.json
|
||||
prompt = data.get("prompt")
|
||||
tokens = len(TIKTOKEN_ENC.encode(prompt))
|
||||
return jsonify({"token_usage": tokens})
|
||||
|
||||
|
||||
@app.route("/api/token-usage", methods=["GET"])
|
||||
@route_logger(logger)
|
||||
def token_usage():
|
||||
project_name = request.args.get("project_name")
|
||||
token_count = AgentState.get_latest_token_usage(project_name)
|
||||
return jsonify({"token_usage": token_count})
|
||||
|
||||
|
||||
@app.route("/api/logs", methods=["GET"])
|
||||
def real_time_logs():
|
||||
log_file = logger.read_log_file()
|
||||
return jsonify({"logs": log_file})
|
||||
|
||||
|
||||
@app.route("/api/settings", methods=["POST"])
|
||||
@route_logger(logger)
|
||||
def set_settings():
|
||||
data = request.json
|
||||
config.update_config(data)
|
||||
return jsonify({"message": "Settings updated"})
|
||||
|
||||
|
||||
@app.route("/api/settings", methods=["GET"])
|
||||
@route_logger(logger)
|
||||
def get_settings():
|
||||
configs = config.get_config()
|
||||
return jsonify({"settings": configs})
|
||||
|
||||
|
||||
@app.route("/api/status", methods=["GET"])
|
||||
@route_logger(logger)
|
||||
def status():
|
||||
return jsonify({"status": "server is running!"})
|
||||
|
||||
if __name__ == "__main__":
|
||||
logger.info("Devika is up and running!")
|
||||
socketio.run(app, debug=False, port=1337, host="0.0.0.0")
|
61
docker-compose.yaml
Normal file
61
docker-compose.yaml
Normal file
|
@ -0,0 +1,61 @@
|
|||
version: "3.9"
|
||||
|
||||
services:
|
||||
ollama-service:
|
||||
image: ollama/ollama:latest
|
||||
expose:
|
||||
- 11434
|
||||
ports:
|
||||
- 11434:11434
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -f http://localhost:11434/ || exit 1"]
|
||||
interval: 5s
|
||||
timeout: 30s
|
||||
retries: 5
|
||||
start_period: 30s
|
||||
networks:
|
||||
- devika-subnetwork
|
||||
|
||||
devika-backend-engine:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: devika.dockerfile
|
||||
depends_on:
|
||||
- ollama-service
|
||||
expose:
|
||||
- 1337
|
||||
ports:
|
||||
- 1337:1337
|
||||
environment:
|
||||
- OLLAMA_HOST=http://ollama-service:11434
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -f http://localhost:1337/ || exit 1"]
|
||||
interval: 5s
|
||||
timeout: 30s
|
||||
retries: 5
|
||||
start_period: 30s
|
||||
volumes:
|
||||
- devika-backend-dbstore:/home/nonroot/devika/db
|
||||
networks:
|
||||
- devika-subnetwork
|
||||
|
||||
devika-frontend-app:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: app.dockerfile
|
||||
args:
|
||||
- VITE_API_BASE_URL=http://127.0.0.1:1337
|
||||
depends_on:
|
||||
- devika-backend-engine
|
||||
expose:
|
||||
- 3000
|
||||
ports:
|
||||
- 3000:3000
|
||||
networks:
|
||||
- devika-subnetwork
|
||||
|
||||
networks:
|
||||
devika-subnetwork:
|
||||
|
||||
volumes:
|
||||
devika-backend-dbstore:
|
BIN
docs/Installation/images/bing-1.png
Normal file
BIN
docs/Installation/images/bing-1.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 74 KiB |
BIN
docs/Installation/images/bing.png
Normal file
BIN
docs/Installation/images/bing.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 58 KiB |
BIN
docs/Installation/images/google-2.png
Normal file
BIN
docs/Installation/images/google-2.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 101 KiB |
BIN
docs/Installation/images/google.png
Normal file
BIN
docs/Installation/images/google.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 226 KiB |
20
docs/Installation/ollama.md
Normal file
20
docs/Installation/ollama.md
Normal file
|
@ -0,0 +1,20 @@
|
|||
# Ollama Installation Guide
|
||||
|
||||
This guide will help you set up Ollama for Devika. Ollama is a tool that allows you to run open-source large language models (LLMs) locally on your machine. It supports varity of models like Llama-2, mistral, code-llama and many more.
|
||||
|
||||
## Installation
|
||||
|
||||
1. go to the [Ollama](https://ollama.com) website.
|
||||
2. Download the latest version of the Ollama.
|
||||
3. After installing the Ollama, you have to download the model you want to use. [Models](https://ollama.com/library)
|
||||
4. select the model you want to download and copy the command. for example, `ollama run llama2`.it will download the model and start the server.
|
||||
5. `ollama list` will show the list of models you have downloaded.
|
||||
6. if the server isn't running then you can manually start by `ollama serve`. default address for the server is `http://localhost:11434`
|
||||
7. for changing port and other configurations, follow the FAQ [here](https://github.com/ollama/ollama/blob/main/docs/faq.md)
|
||||
8. for more information, `ollama [command] --help` will show the help menu. for example, `ollama run --help` will show the help menu for the run command.
|
||||
|
||||
|
||||
## Devika Configuration
|
||||
|
||||
- if you serve the Ollama on a different address, you can change the port in the `config.toml` file or you can change it via UI.
|
||||
- if you are using the default address, devika will automatically detect the server and and fetch the models list.
|
33
docs/Installation/search_engine.md
Normal file
33
docs/Installation/search_engine.md
Normal file
|
@ -0,0 +1,33 @@
|
|||
# search Engine setup
|
||||
|
||||
To use the search engine capabilities of Devika, you need to set up the search engine API keys. Currently, Devika supports Bing, Google and DuckDuckGo search engines. If you want to use duckduckgo, you don't need to set up any API keys.
|
||||
|
||||
For Bing and Google search engines, you need to set up the API keys. Here's how you can do it:
|
||||
|
||||
## Bing Search API
|
||||
- Create Azure account. You can create a free account [here](https://azure.microsoft.com/en-us/free/).
|
||||
- Go to the [Bing Search API](https://www.microsoft.com/en-us/bing/apis/bing-web-search-api) website.
|
||||
- click on the `Try now` button.
|
||||
- Sign in/sign up with your Azure account.
|
||||
- Create a new resource group (if you don't have any).
|
||||
![alt text](images/bing.png)
|
||||
- click on the `Review and create` button.
|
||||
- if everything is fine, click on the `Create` button.
|
||||
- Once the resource is created, go to the `Keys and Endpoint` tab.
|
||||
![alt text](images/bing-1.png)
|
||||
- Copy either `Key1` or `Key2` and paste it into the `API_KEYS` field with the name `BING` in the `config.toml` file located in the root directory of Devika, or you can set it via the UI.
|
||||
- Copy the `Endpoint` and paste it into the `API_Endpoints` field with the name `BING` in the `config.toml` file located in the root directory of Devika, or you can set it via the UI.
|
||||
|
||||
|
||||
## Google Search API
|
||||
- if don't have then create GCP account [Google Cloud Console](https://console.cloud.google.com/).
|
||||
- visit [Here](https://developers.google.com/custom-search/v1/overview) is the official documentation.
|
||||
- click on `Get a Key`.
|
||||
- select the project you have or create a new project. click on next.
|
||||
![alt text](images/google.png)
|
||||
- it enable the Custom Search API for the project and create the API key.
|
||||
- Copy the API key and paste it in the API_KEYS field with the name `GOOGLE_SEARCH` in the `config.toml` file in the root directory of Devika or you can set it via UI.
|
||||
- for the search engine id, go to the [Google Custom Search Engine](https://programmablesearchengine.google.com/controlpanel/all) website.
|
||||
- click on the `Add` button.
|
||||
![alt text](images/google-2.png)
|
||||
- After creating the engine. Copy the `Search Engine ID` and paste it in the API_Endpoints field with the name `GOOGLE_SEARCH_ENGINE_ID` in the `config.toml` file in the root directory of Devika or you can set it via UI.
|
251
docs/architecture/ARCHITECTURE.md
Normal file
251
docs/architecture/ARCHITECTURE.md
Normal file
|
@ -0,0 +1,251 @@
|
|||
# Devika Architecture
|
||||
|
||||
Devika is an advanced AI software engineer that can understand high-level human instructions, break them down into steps, research relevant information, and write code to achieve a given objective. This document provides a detailed technical overview of Devika's system architecture and how the various components work together.
|
||||
|
||||
## Table of Contents
|
||||
|
||||
1. [Overview](#overview)
|
||||
2. [Agent Core](#agent-core)
|
||||
3. [Agents](#agents)
|
||||
- [Planner](#planner)
|
||||
- [Researcher](#researcher)
|
||||
- [Coder](#coder)
|
||||
- [Action](#action)
|
||||
- [Runner](#runner)
|
||||
- [Feature](#feature)
|
||||
- [Patcher](#patcher)
|
||||
- [Reporter](#reporter)
|
||||
- [Decision](#decision)
|
||||
4. [Language Models](#language-models)
|
||||
5. [Browser Interaction](#browser-interaction)
|
||||
6. [Project Management](#project-management)
|
||||
7. [Agent State Management](#agent-state-management)
|
||||
8. [Services](#services)
|
||||
9. [Utilities](#utilities)
|
||||
10. [Conclusion](#conclusion)
|
||||
|
||||
## Overview
|
||||
|
||||
At a high level, Devika consists of the following key components:
|
||||
|
||||
- **Agent Core**: Orchestrates the overall AI planning, reasoning and execution process. Communicates with various sub-agents.
|
||||
- **Agents**: Specialized sub-agents that handle specific tasks like planning, research, coding, patching, reporting etc.
|
||||
- **Language Models**: Leverages large language models (LLMs) like Claude, GPT-4, GPT-3 for natural language understanding and generation.
|
||||
- **Browser Interaction**: Enables web browsing, information gathering, and interaction with web elements.
|
||||
- **Project Management**: Handles organization and persistence of project-related data.
|
||||
- **Agent State Management**: Tracks and persists the dynamic state of the AI agent across interactions.
|
||||
- **Services**: Integrations with external services like GitHub, Netlify for enhanced capabilities.
|
||||
- **Utilities**: Supporting modules for configuration, logging, vector search, PDF generation etc.
|
||||
|
||||
Let's dive into each of these components in more detail.
|
||||
|
||||
## Agent Core
|
||||
|
||||
The `Agent` class serves as the central engine that drives Devika's AI planning and execution loop. Here's how it works:
|
||||
|
||||
1. When a user provides a high-level prompt, the `execute` method is invoked on the Agent.
|
||||
2. The prompt is first passed to the Planner agent to generate a step-by-step plan.
|
||||
3. The Researcher agent then takes this plan and extracts relevant search queries and context.
|
||||
4. The Agent performs web searches using Bing Search API and crawls the top results.
|
||||
5. The raw crawled content is passed through the Formatter agent to extract clean, relevant information.
|
||||
6. This researched context, along with the step-by-step plan, is fed to the Coder agent to generate code.
|
||||
7. The generated code is saved to the project directory on disk.
|
||||
8. If the user interacts further with a follow-up prompt, the `subsequent_execute` method is invoked.
|
||||
9. The Action agent determines the appropriate action to take based on the user's message (run code, deploy, write tests, add feature, fix bug, write report etc.)
|
||||
10. The corresponding specialized agent is invoked to perform the action (Runner, Feature, Patcher, Reporter).
|
||||
11. Results are communicated back to the user and the project files are updated.
|
||||
|
||||
Throughout this process, the Agent Core is responsible for:
|
||||
- Managing conversation history and project-specific context
|
||||
- Updating agent state and internal monologue
|
||||
- Accumulating context keywords across agent prompts
|
||||
- Emulating the "thinking" process of the AI through timed agent state updates
|
||||
- Handling special commands through the Decision agent (e.g. git clone, browser interaction session)
|
||||
|
||||
## Agents
|
||||
|
||||
Devika's cognitive abilities are powered by a collection of specialized sub-agents. Each agent is implemented as a separate Python class. Agents communicate with the underlying LLMs through prompt templates defined in Jinja2 format. Key agents include:
|
||||
|
||||
### Planner
|
||||
- Generates a high-level step-by-step plan based on the user's prompt
|
||||
- Extracts focus area and provides a summary
|
||||
- Uses few-shot prompting to provide examples of the expected response format
|
||||
|
||||
### Researcher
|
||||
- Takes the generated plan and extracts relevant search queries
|
||||
- Ranks and filters queries based on relevance and specificity
|
||||
- Prompts the user for additional context if required
|
||||
- Aims to maximize information gain while minimizing number of searches
|
||||
|
||||
### Coder
|
||||
- Generates code based on the step-by-step plan and researched context
|
||||
- Segments code into appropriate files and directories
|
||||
- Includes informative comments and documentation
|
||||
- Handles a variety of languages and frameworks
|
||||
- Validates code syntax and style
|
||||
|
||||
### Action
|
||||
- Determines the appropriate action to take based on the user's follow-up prompt
|
||||
- Maps user intent to a specific action keyword (run, test, deploy, fix, implement, report)
|
||||
- Provides a human-like confirmation of the action to the user
|
||||
|
||||
### Runner
|
||||
- Executes the written code in a sandboxed environment
|
||||
- Handles different OS environments (Mac, Linux, Windows)
|
||||
- Streams command output to user in real-time
|
||||
- Gracefully handles errors and exceptions
|
||||
|
||||
### Feature
|
||||
- Implements a new feature based on user's specification
|
||||
- Modifies existing project files while maintaining code structure and style
|
||||
- Performs incremental testing to verify feature is working as expected
|
||||
|
||||
### Patcher
|
||||
- Debugs and fixes issues based on user's description or error message
|
||||
- Analyzes existing code to identify potential root causes
|
||||
- Suggests and implements fix, with explanation of the changes made
|
||||
|
||||
### Reporter
|
||||
- Generates a comprehensive report summarizing the project
|
||||
- Includes high-level overview, technical design, setup instructions, API docs etc.
|
||||
- Formats report in a clean, readable structure with table of contents
|
||||
- Exports report as a PDF document
|
||||
|
||||
### Decision
|
||||
- Handles special command-like instructions that don't fit other agents
|
||||
- Maps commands to specific functions (git clone, browser interaction etc.)
|
||||
- Executes the corresponding function with provided arguments
|
||||
|
||||
Each agent follows a common pattern:
|
||||
1. Prepare a prompt by rendering the Jinja2 template with current context
|
||||
2. Query the LLM to get a response based on the prompt
|
||||
3. Validate and parse the LLM's response to extract structured output
|
||||
4. Perform any additional processing or side-effects (e.g. save to disk)
|
||||
5. Return the result to the Agent Core for further action
|
||||
|
||||
Agents aim to be stateless and idempotent where possible. State and history is managed by the Agent Core and passed into the agents as needed. This allows for a modular, composable design.
|
||||
|
||||
## Language Models
|
||||
|
||||
Devika's natural language processing capabilities are driven by state-of-the-art LLMs. The `LLM` class provides a unified interface to interact with different language models:
|
||||
|
||||
- **Claude** (Anthropic): Claude models like claude-v1.3, claude-instant-v1.0 etc.
|
||||
- **GPT-4/GPT-3** (OpenAI): Models like gpt-4, gpt-3.5-turbo etc.
|
||||
- **Self-hosted models** (via [Ollama](https://ollama.com/)): Allows using open-source models in a self-hosted environment
|
||||
|
||||
The `LLM` class abstracts out the specifics of each provider's API, allowing agents to interact with the models in a consistent way. It supports:
|
||||
- Listing available models
|
||||
- Generating completions based on a prompt
|
||||
- Tracking and accumulating token usage over time
|
||||
|
||||
Choosing the right model for a given use case depends on factors like desired quality, speed, cost etc. The modular design allows swapping out models easily.
|
||||
|
||||
## Browser Interaction
|
||||
|
||||
Devika can interact with webpages in an automated fashion to gather information and perform actions. This is powered by the `Browser` and `Crawler` classes.
|
||||
|
||||
The `Browser` class uses Playwright to provide high-level web automation primitives:
|
||||
- Spawning a browser instance (Chromium)
|
||||
- Navigating to a URL
|
||||
- Querying DOM elements
|
||||
- Extracting page content as text, Markdown, PDF etc.
|
||||
- Taking a screenshot of the page
|
||||
|
||||
The `Crawler` class defines an agent that can interact with a webpage based on natural language instructions. It leverages:
|
||||
- Pre-defined browser actions like scroll, click, type etc.
|
||||
- A prompt template that provides examples of how to use these actions
|
||||
- LLM to determine the best action to take based on current page content and objective
|
||||
|
||||
The `start_interaction` function sets up a loop where:
|
||||
1. The current page content and objective is passed to the LLM
|
||||
2. The LLM returns the next best action to take (e.g. "CLICK 12" or "TYPE 7 machine learning")
|
||||
3. The Crawler executes this action on the live page
|
||||
4. The process repeats from the updated page state
|
||||
|
||||
This allows performing a sequence of actions to achieve a higher-level objective (e.g. research a topic, fill out a form, interact with an app etc.)
|
||||
|
||||
## Project Management
|
||||
|
||||
The `ProjectManager` class is responsible for creating, updating and querying projects and their associated metadata. Key functions include:
|
||||
|
||||
- Creating a new project and initializing its directory structure
|
||||
- Deleting a project and its associated files
|
||||
- Adding a message to a project's conversation history
|
||||
- Retrieving messages for a given project
|
||||
- Getting the latest user/AI message in a conversation
|
||||
- Listing all projects
|
||||
- Zipping a project's files for export
|
||||
|
||||
Project metadata is persisted in a SQLite database using SQLModel. The `Projects` table stores:
|
||||
- Project name
|
||||
- JSON-serialized conversation history
|
||||
|
||||
This allows the agent to work on multiple projects simultaneously and retain conversation history across sessions.
|
||||
|
||||
## Agent State Management
|
||||
|
||||
As the AI agent works on a task, we need to track and display its internal state to the user. The `AgentState` class handles this by providing an interface to:
|
||||
|
||||
- Initialize a new agent state
|
||||
- Add a state to the current sequence of states for a project
|
||||
- Update the latest state for a project
|
||||
- Query the latest state or entire state history for a project
|
||||
- Mark the agent as active/inactive or task as completed
|
||||
|
||||
Agent state includes information like:
|
||||
- Current step or action being executed
|
||||
- Internal monologue reflecting the agent's current "thoughts"
|
||||
- Browser interactions (URL visited, screenshot)
|
||||
- Terminal interactions (command executed, output)
|
||||
- Token usage so far
|
||||
|
||||
Like projects, agent states are also persisted in the SQLite DB using SQLModel. The `AgentStateModel` table stores:
|
||||
- Project name
|
||||
- JSON-serialized list of states
|
||||
|
||||
Having a persistent log of agent states is useful for:
|
||||
- Providing real-time visibility to the user
|
||||
- Auditing and debugging agent behavior
|
||||
- Resuming from interruptions or failures
|
||||
|
||||
## Services
|
||||
|
||||
Devika integrates with external services to augment its capabilities:
|
||||
|
||||
- **GitHub**: Performing git operations like clone/pull, listing repos/commits/files etc.
|
||||
- **Netlify**: Deploying web apps and sites seamlessly
|
||||
|
||||
The `GitHub` and `Netlify` classes provide lightweight wrappers around the respective service APIs.
|
||||
They handle authentication, making HTTP requests, and parsing responses.
|
||||
|
||||
This allows Devika to perform actions like:
|
||||
- Cloning a repo given a GitHub URL
|
||||
- Listing a user's GitHub repos
|
||||
- Creating a new Netlify site
|
||||
- Deploying a directory to Netlify
|
||||
- Providing the deployed site URL to the user
|
||||
|
||||
Integrations are done in a modular way so that new services can be added easily.
|
||||
|
||||
## Utilities
|
||||
|
||||
Devika makes use of several utility modules to support its functioning:
|
||||
|
||||
- `Config`: Loads and provides access to configuration settings (API keys, folder paths etc.)
|
||||
- `Logger`: Sets up logging to console and file, with support for log levels and colors
|
||||
- `ReadCode`: Recursively reads code files in a directory and converts them into a Markdown format
|
||||
- `SentenceBERT`: Extracts keywords and semantic information from text using SentenceBERT embeddings
|
||||
- `Experts`: A collection of domain-specific knowledge bases to assist in certain areas (e.g. webdev, physics, chemistry, math)
|
||||
|
||||
The utility modules aim to provide reusable functionality that is used across different parts of the system.
|
||||
|
||||
## Conclusion
|
||||
|
||||
Devika is a complex system that combines multiple AI and automation techniques to deliver an intelligent programming assistant. Key design principles include:
|
||||
|
||||
- Modularity: Breaking down functionality into specialized agents and services
|
||||
- Flexibility: Supporting different LLMs, services and domains in a pluggable fashion
|
||||
- Persistence: Storing project and agent state in a DB to enable pause/resume and auditing
|
||||
- Transparency: Surfacing agent thought process and interactions to user in real-time
|
||||
|
||||
By understanding how the different components work together, we can extend, optimize and scale Devika to take on increasingly sophisticated software engineering tasks. The agent-based architecture provides a strong foundation to build more advanced AI capabilities in the future.
|
16
docs/architecture/README.md
Normal file
16
docs/architecture/README.md
Normal file
|
@ -0,0 +1,16 @@
|
|||
## System Architecture
|
||||
|
||||
Devika's system architecture consists of the following key components:
|
||||
|
||||
1. **User Interface**: A web-based chat interface for interacting with Devika, viewing project files, and monitoring the agent's state.
|
||||
2. **Agent Core**: The central component that orchestrates the AI planning, reasoning, and execution process. It communicates with various sub-agents and modules to accomplish tasks.
|
||||
3. **Large Language Models**: Devika leverages state-of-the-art language models like **Claude**, **GPT-4**, and **Local LLMs via Ollama** for natural language understanding, generation, and reasoning.
|
||||
4. **Planning and Reasoning Engine**: Responsible for breaking down high-level objectives into actionable steps and making decisions based on the current context.
|
||||
5. **Research Module**: Utilizes keyword extraction and web browsing capabilities to gather relevant information for the task at hand.
|
||||
6. **Code Writing Module**: Generates code based on the plan, research findings, and user requirements. Supports multiple programming languages.
|
||||
7. **Browser Interaction Module**: Enables Devika to navigate websites, extract information, and interact with web elements as needed.
|
||||
8. **Knowledge Base**: Stores and retrieves project-specific information, code snippets, and learned knowledge for efficient access.
|
||||
9. **Database**: Persists project data, agent states, and configuration settings.
|
||||
|
||||
Read [ARCHITECTURE.md](https://github.com/stitionai/devika/Docs/architecture/ARCHITECTURE.md) for the detailed architecture of Devika.
|
||||
Read [UNDER_THE_HOOD.md](https://github.com/stitionai/devika/Docs/architecture/UNDER_THE_HOOD.md) for the detailed working of Devika.
|
50
docs/architecture/UNDER_THE_HOOD.md
Normal file
50
docs/architecture/UNDER_THE_HOOD.md
Normal file
|
@ -0,0 +1,50 @@
|
|||
## Under The Hood
|
||||
|
||||
Let's dive deeper into some of the key components and techniques used in Devika:
|
||||
|
||||
### AI Planning and Reasoning
|
||||
|
||||
Devika employs advanced AI planning and reasoning algorithms to break down high-level objectives into actionable steps. The planning process involves the following stages:
|
||||
|
||||
1. **Objective Understanding**: Devika analyzes the given objective or task description to understand the user's intent and requirements.
|
||||
2. **Context Gathering**: Relevant context is collected from the conversation history, project files, and knowledge base to inform the planning process.
|
||||
3. **Step Generation**: Based on the objective and context, Devika generates a sequence of high-level steps to accomplish the task.
|
||||
4. **Refinement and Validation**: The generated steps are refined and validated to ensure their feasibility and alignment with the objective.
|
||||
5. **Execution**: Devika executes each step in the plan, utilizing various sub-agents and modules as needed.
|
||||
|
||||
The reasoning engine constantly evaluates the progress and makes adjustments to the plan based on new information or feedback received during execution.
|
||||
|
||||
### Keyword Extraction
|
||||
|
||||
To enable focused research and information gathering, Devika employs keyword extraction techniques. The process involves the following steps:
|
||||
|
||||
1. **Preprocessing**: The input text (objective, conversation history, or project files) is preprocessed by removing stop words, tokenizing, and normalizing the text.
|
||||
2. **Keyword Identification**: Devika uses the BERT (Bidirectional Encoder Representations from Transformers) model to identify important keywords and phrases from the preprocessed text. BERT's pre-training on a large corpus allows it to capture semantic relationships and understand the significance of words in the given context.
|
||||
3. **Keyword Ranking**: The identified keywords are ranked based on their relevance and importance to the task at hand. Techniques like TF-IDF (Term Frequency-Inverse Document Frequency) and TextRank are used to assign scores to each keyword.
|
||||
4. **Keyword Selection**: The top-ranked keywords are selected as the most relevant and informative for the current context. These keywords are used to guide the research and information gathering process.
|
||||
|
||||
By extracting contextually relevant keywords, Devika can focus its research efforts and retrieve pertinent information to assist in the task completion.
|
||||
|
||||
### Browser Interaction
|
||||
|
||||
Devika incorporates browser interaction capabilities to navigate websites, extract information, and interact with web elements. The browser interaction module leverages the Playwright library to automate web interactions. The process involves the following steps:
|
||||
|
||||
1. **Navigation**: Devika uses Playwright to navigate to specific URLs or perform searches based on the keywords or requirements provided.
|
||||
2. **Element Interaction**: Playwright allows Devika to interact with web elements such as clicking buttons, filling forms, and extracting text from specific elements.
|
||||
3. **Page Parsing**: Devika parses the HTML structure of the web pages visited to extract relevant information. It uses techniques like CSS selectors and XPath to locate and extract specific data points.
|
||||
4. **JavaScript Execution**: Playwright enables Devika to execute JavaScript code within the browser context, allowing for dynamic interactions and data retrieval.
|
||||
5. **Screenshot Capture**: Devika can capture screenshots of the web pages visited, which can be useful for visual reference or debugging purposes.
|
||||
|
||||
The browser interaction module empowers Devika to gather information from the web, interact with online resources, and incorporate real-time data into its decision-making and code generation processes.
|
||||
|
||||
### Code Writing
|
||||
|
||||
Devika's code writing module generates code based on the plan, research findings, and user requirements. The process involves the following steps:
|
||||
|
||||
1. **Language Selection**: Devika identifies the programming language specified by the user or infers it based on the project context.
|
||||
2. **Code Structure Generation**: Based on the plan and language-specific patterns, Devika generates the high-level structure of the code, including classes, functions, and modules.
|
||||
3. **Code Population**: Devika fills in the code structure with specific logic, algorithms, and data manipulation statements. It leverages the research findings, code snippets from the knowledge base, and its own understanding of programming concepts to generate meaningful code.
|
||||
4. **Code Formatting**: The generated code is formatted according to the language-specific conventions and best practices to ensure readability and maintainability.
|
||||
5. **Code Review and Refinement**: Devika reviews the generated code for syntax errors, logical inconsistencies, and potential improvements. It iteratively refines the code based on its own analysis and any feedback provided by the user.
|
||||
|
||||
Devika's code writing capabilities enable it to generate functional and efficient code in various programming languages, taking into account the specific requirements and context of each project.
|
32
requirements.txt
Normal file
32
requirements.txt
Normal file
|
@ -0,0 +1,32 @@
|
|||
flask
|
||||
flask-cors
|
||||
toml
|
||||
urllib3
|
||||
requests
|
||||
colorama
|
||||
fastlogging
|
||||
Jinja2
|
||||
mistletoe
|
||||
markdownify
|
||||
pdfminer.six
|
||||
playwright
|
||||
pytest-playwright
|
||||
tiktoken
|
||||
ollama
|
||||
openai
|
||||
anthropic
|
||||
google-generativeai
|
||||
sqlmodel
|
||||
keybert
|
||||
GitPython
|
||||
netlify-py
|
||||
Markdown
|
||||
xhtml2pdf
|
||||
mistralai
|
||||
Flask-SocketIO
|
||||
eventlet
|
||||
groq
|
||||
duckduckgo-search
|
||||
orjson
|
||||
gevent
|
||||
gevent-websocket
|
31
sample.config.toml
Normal file
31
sample.config.toml
Normal file
|
@ -0,0 +1,31 @@
|
|||
[STORAGE]
|
||||
SQLITE_DB = "data/db/devika.db"
|
||||
SCREENSHOTS_DIR = "data/screenshots"
|
||||
PDFS_DIR = "data/pdfs"
|
||||
PROJECTS_DIR = "data/projects"
|
||||
LOGS_DIR = "data/logs"
|
||||
REPOS_DIR = "data/repos"
|
||||
|
||||
[API_KEYS]
|
||||
BING = "<YOUR_BING_API_KEY>"
|
||||
GOOGLE_SEARCH = "<YOUR_GOOGLE_SEARCH_API_KEY>"
|
||||
GOOGLE_SEARCH_ENGINE_ID = "<YOUR_GOOGLE_SEARCH_ENGINE_ID>"
|
||||
CLAUDE = "<YOUR_CLAUDE_API_KEY>"
|
||||
OPENAI = "<YOUR_OPENAI_API_KEY>"
|
||||
GEMINI = "<YOUR_GEMINI_API_KEY>"
|
||||
MISTRAL = "<YOUR_MISTRAL_API_KEY>"
|
||||
GROQ = "<YOUR_GROQ_API_KEY>"
|
||||
NETLIFY = "<YOUR_NETLIFY_API_KEY>"
|
||||
|
||||
[API_ENDPOINTS]
|
||||
BING = "https://api.bing.microsoft.com/v7.0/search"
|
||||
GOOGLE = "https://www.googleapis.com/customsearch/v1"
|
||||
OLLAMA = "http://127.0.0.1:11434"
|
||||
OPENAI = "https://api.openai.com/v1"
|
||||
|
||||
[LOGGING]
|
||||
LOG_REST_API = "true"
|
||||
LOG_PROMPTS = "false"
|
||||
|
||||
[TIMEOUT]
|
||||
INFERENCE = 60
|
7
setup.sh
Normal file
7
setup.sh
Normal file
|
@ -0,0 +1,7 @@
|
|||
#!/bin/bash
|
||||
|
||||
pip3 install -r requirements.txt
|
||||
playwright install
|
||||
python3 -m playwright install-deps
|
||||
cd ui/
|
||||
bun install
|
9
src/agents/__init__.py
Normal file
9
src/agents/__init__.py
Normal file
|
@ -0,0 +1,9 @@
|
|||
from .agent import Agent
|
||||
|
||||
from .planner import Planner
|
||||
from .internal_monologue import InternalMonologue
|
||||
from .researcher import Researcher
|
||||
from .formatter import Formatter
|
||||
from .coder import Coder
|
||||
from .action import Action
|
||||
from .runner import Runner
|
1
src/agents/action/__init__.py
Normal file
1
src/agents/action/__init__.py
Normal file
|
@ -0,0 +1 @@
|
|||
from .action import Action
|
41
src/agents/action/action.py
Normal file
41
src/agents/action/action.py
Normal file
|
@ -0,0 +1,41 @@
|
|||
import json
|
||||
|
||||
from jinja2 import Environment, BaseLoader
|
||||
|
||||
from src.services.utils import retry_wrapper, validate_responses
|
||||
from src.config import Config
|
||||
from src.llm import LLM
|
||||
|
||||
PROMPT = open("src/agents/action/prompt.jinja2", "r").read().strip()
|
||||
|
||||
class Action:
|
||||
def __init__(self, base_model: str):
|
||||
config = Config()
|
||||
self.project_dir = config.get_projects_dir()
|
||||
|
||||
self.llm = LLM(model_id=base_model)
|
||||
|
||||
def render(
|
||||
self, conversation: str
|
||||
) -> str:
|
||||
env = Environment(loader=BaseLoader())
|
||||
template = env.from_string(PROMPT)
|
||||
return template.render(
|
||||
conversation=conversation
|
||||
)
|
||||
|
||||
@validate_responses
|
||||
def validate_response(self, response: str):
|
||||
if "response" not in response and "action" not in response:
|
||||
return False
|
||||
else:
|
||||
return response["response"], response["action"]
|
||||
|
||||
@retry_wrapper
|
||||
def execute(self, conversation: list, project_name: str) -> str:
|
||||
prompt = self.render(conversation)
|
||||
response = self.llm.inference(prompt, project_name)
|
||||
|
||||
valid_response = self.validate_response(response)
|
||||
|
||||
return valid_response
|
31
src/agents/action/prompt.jinja2
Normal file
31
src/agents/action/prompt.jinja2
Normal file
|
@ -0,0 +1,31 @@
|
|||
You are Devika, an AI Software Engineer. You have been talking to the user and this is your exchanges so far:
|
||||
|
||||
```
|
||||
{% for message in conversation %}
|
||||
{{ message }}
|
||||
{% endfor %}
|
||||
```
|
||||
|
||||
User's last message: {{ conversation[-1] }}
|
||||
|
||||
You are now going to respond to the user's last message according to the specific request.
|
||||
|
||||
The user could be asking the following:
|
||||
- `answer` - Answer a question about the project.
|
||||
- `run` - Run the project.
|
||||
- `deploy` - Deploy the project.
|
||||
- `feature` - Add a new feature to the project.
|
||||
- `bug` - Fix a bug in the project.
|
||||
- `report` - Generate a report on the project.
|
||||
|
||||
Your response should be in the following format:
|
||||
```
|
||||
{
|
||||
"response": "Your human-like response to the user's message here describing the action you are taking."
|
||||
"action": "run"
|
||||
}
|
||||
```
|
||||
|
||||
The action can only be one, read the user's last message carefully to determine which action to take. Sometimes the user's prompt might indicate multiple actions but you should only take one optimal action and use your answer response to convey what you are doing.
|
||||
|
||||
Any response other than the JSON format will be rejected by the system.
|
365
src/agents/agent.py
Normal file
365
src/agents/agent.py
Normal file
|
@ -0,0 +1,365 @@
|
|||
from .planner import Planner
|
||||
from .researcher import Researcher
|
||||
from .formatter import Formatter
|
||||
from .coder import Coder
|
||||
from .action import Action
|
||||
from .internal_monologue import InternalMonologue
|
||||
from .answer import Answer
|
||||
from .runner import Runner
|
||||
from .feature import Feature
|
||||
from .patcher import Patcher
|
||||
from .reporter import Reporter
|
||||
from .decision import Decision
|
||||
|
||||
from src.project import ProjectManager
|
||||
from src.state import AgentState
|
||||
from src.logger import Logger
|
||||
|
||||
from src.bert.sentence import SentenceBert
|
||||
from src.memory import KnowledgeBase
|
||||
from src.browser.search import BingSearch, GoogleSearch, DuckDuckGoSearch
|
||||
from src.browser import Browser
|
||||
from src.browser import start_interaction
|
||||
from src.filesystem import ReadCode
|
||||
from src.services import Netlify
|
||||
from src.documenter.pdf import PDF
|
||||
|
||||
import json
|
||||
import time
|
||||
import platform
|
||||
import tiktoken
|
||||
import asyncio
|
||||
|
||||
from src.socket_instance import emit_agent
|
||||
|
||||
|
||||
class Agent:
|
||||
def __init__(self, base_model: str, search_engine: str, browser: Browser = None):
|
||||
if not base_model:
|
||||
raise ValueError("base_model is required")
|
||||
|
||||
self.logger = Logger()
|
||||
|
||||
"""
|
||||
Accumulate contextual keywords from chained prompts of all preparation agents
|
||||
"""
|
||||
self.collected_context_keywords = []
|
||||
|
||||
"""
|
||||
Agents
|
||||
"""
|
||||
self.planner = Planner(base_model=base_model)
|
||||
self.researcher = Researcher(base_model=base_model)
|
||||
self.formatter = Formatter(base_model=base_model)
|
||||
self.coder = Coder(base_model=base_model)
|
||||
self.action = Action(base_model=base_model)
|
||||
self.internal_monologue = InternalMonologue(base_model=base_model)
|
||||
self.answer = Answer(base_model=base_model)
|
||||
self.runner = Runner(base_model=base_model)
|
||||
self.feature = Feature(base_model=base_model)
|
||||
self.patcher = Patcher(base_model=base_model)
|
||||
self.reporter = Reporter(base_model=base_model)
|
||||
self.decision = Decision(base_model=base_model)
|
||||
|
||||
self.project_manager = ProjectManager()
|
||||
self.agent_state = AgentState()
|
||||
self.engine = search_engine
|
||||
self.tokenizer = tiktoken.get_encoding("cl100k_base")
|
||||
|
||||
async def open_page(self, project_name, url):
|
||||
browser = await Browser().start()
|
||||
|
||||
await browser.go_to(url)
|
||||
_, raw = await browser.screenshot(project_name)
|
||||
data = await browser.extract_text()
|
||||
await browser.close()
|
||||
|
||||
return browser, raw, data
|
||||
|
||||
def search_queries(self, queries: list, project_name: str) -> dict:
|
||||
results = {}
|
||||
|
||||
knowledge_base = KnowledgeBase()
|
||||
|
||||
if self.engine == "bing":
|
||||
web_search = BingSearch()
|
||||
elif self.engine == "google":
|
||||
web_search = GoogleSearch()
|
||||
else:
|
||||
web_search = DuckDuckGoSearch()
|
||||
|
||||
self.logger.info(f"\nSearch Engine :: {self.engine}")
|
||||
|
||||
for query in queries:
|
||||
query = query.strip().lower()
|
||||
|
||||
# knowledge = knowledge_base.get_knowledge(tag=query)
|
||||
# if knowledge:
|
||||
# results[query] = knowledge
|
||||
# continue
|
||||
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
|
||||
web_search.search(query)
|
||||
|
||||
link = web_search.get_first_link()
|
||||
print("\nLink :: ", link, '\n')
|
||||
if not link:
|
||||
continue
|
||||
browser, raw, data = loop.run_until_complete(self.open_page(project_name, link))
|
||||
emit_agent("screenshot", {"data": raw, "project_name": project_name}, False)
|
||||
results[query] = self.formatter.execute(data, project_name)
|
||||
|
||||
self.logger.info(f"got the search results for : {query}")
|
||||
# knowledge_base.add_knowledge(tag=query, contents=results[query])
|
||||
return results
|
||||
|
||||
def update_contextual_keywords(self, sentence: str):
|
||||
"""
|
||||
Update the context keywords with the latest sentence/prompt
|
||||
"""
|
||||
keywords = SentenceBert(sentence).extract_keywords()
|
||||
for keyword in keywords:
|
||||
self.collected_context_keywords.append(keyword[0])
|
||||
|
||||
return self.collected_context_keywords
|
||||
|
||||
def make_decision(self, prompt: str, project_name: str) -> str:
|
||||
decision = self.decision.execute(prompt, project_name)
|
||||
|
||||
for item in decision:
|
||||
function = item["function"]
|
||||
args = item["args"]
|
||||
reply = item["reply"]
|
||||
|
||||
self.project_manager.add_message_from_devika(project_name, reply)
|
||||
|
||||
if function == "git_clone":
|
||||
url = args["url"]
|
||||
# Implement git clone functionality here
|
||||
|
||||
elif function == "generate_pdf_document":
|
||||
user_prompt = args["user_prompt"]
|
||||
# Call the reporter agent to generate the PDF document
|
||||
markdown = self.reporter.execute([user_prompt], "", project_name)
|
||||
_out_pdf_file = PDF().markdown_to_pdf(markdown, project_name)
|
||||
|
||||
project_name_space_url = project_name.replace(" ", "%20")
|
||||
pdf_download_url = "http://127.0.0.1:1337/api/download-project-pdf?project_name={}".format(
|
||||
project_name_space_url)
|
||||
response = f"I have generated the PDF document. You can download it from here: {pdf_download_url}"
|
||||
|
||||
#asyncio.run(self.open_page(project_name, pdf_download_url))
|
||||
|
||||
self.project_manager.add_message_from_devika(project_name, response)
|
||||
|
||||
elif function == "browser_interaction":
|
||||
user_prompt = args["user_prompt"]
|
||||
# Call the interaction agent to interact with the browser
|
||||
start_interaction(self.base_model, user_prompt, project_name)
|
||||
|
||||
elif function == "coding_project":
|
||||
user_prompt = args["user_prompt"]
|
||||
# Call the planner, researcher, coder agents in sequence
|
||||
plan = self.planner.execute(user_prompt, project_name)
|
||||
planner_response = self.planner.parse_response(plan)
|
||||
|
||||
research = self.researcher.execute(plan, self.collected_context_keywords, project_name)
|
||||
search_results = self.search_queries(research["queries"], project_name)
|
||||
|
||||
code = self.coder.execute(
|
||||
step_by_step_plan=plan,
|
||||
user_context=research["ask_user"],
|
||||
search_results=search_results,
|
||||
project_name=project_name
|
||||
)
|
||||
self.coder.save_code_to_project(code, project_name)
|
||||
|
||||
def subsequent_execute(self, prompt: str, project_name: str):
|
||||
"""
|
||||
Subsequent flow of execution
|
||||
"""
|
||||
new_message = self.project_manager.new_message()
|
||||
new_message['message'] = prompt
|
||||
new_message['from_devika'] = False
|
||||
self.project_manager.add_message_from_user(project_name, new_message['message'])
|
||||
|
||||
os_system = platform.platform()
|
||||
|
||||
self.agent_state.set_agent_active(project_name, True)
|
||||
|
||||
conversation = self.project_manager.get_all_messages_formatted(project_name)
|
||||
code_markdown = ReadCode(project_name).code_set_to_markdown()
|
||||
|
||||
response, action = self.action.execute(conversation, project_name)
|
||||
|
||||
self.project_manager.add_message_from_devika(project_name, response)
|
||||
|
||||
print("\naction :: ", action, '\n')
|
||||
|
||||
if action == "answer":
|
||||
response = self.answer.execute(
|
||||
conversation=conversation,
|
||||
code_markdown=code_markdown,
|
||||
project_name=project_name
|
||||
)
|
||||
self.project_manager.add_message_from_devika(project_name, response)
|
||||
|
||||
elif action == "run":
|
||||
project_path = self.project_manager.get_project_path(project_name)
|
||||
self.runner.execute(
|
||||
conversation=conversation,
|
||||
code_markdown=code_markdown,
|
||||
os_system=os_system,
|
||||
project_path=project_path,
|
||||
project_name=project_name
|
||||
)
|
||||
|
||||
elif action == "deploy":
|
||||
deploy_metadata = Netlify().deploy(project_name)
|
||||
deploy_url = deploy_metadata["deploy_url"]
|
||||
|
||||
response = {
|
||||
"message": "Done! I deployed your project on Netlify.",
|
||||
"deploy_url": deploy_url
|
||||
}
|
||||
response = json.dumps(response, indent=4)
|
||||
|
||||
self.project_manager.add_message_from_devika(project_name, response)
|
||||
|
||||
elif action == "feature":
|
||||
code = self.feature.execute(
|
||||
conversation=conversation,
|
||||
code_markdown=code_markdown,
|
||||
system_os=os_system,
|
||||
project_name=project_name
|
||||
)
|
||||
print("\nfeature code :: ", code, '\n')
|
||||
self.feature.save_code_to_project(code, project_name)
|
||||
|
||||
elif action == "bug":
|
||||
code = self.patcher.execute(
|
||||
conversation=conversation,
|
||||
code_markdown=code_markdown,
|
||||
commands=None,
|
||||
error=prompt,
|
||||
system_os=os_system,
|
||||
project_name=project_name
|
||||
)
|
||||
print("\nbug code :: ", code, '\n')
|
||||
self.patcher.save_code_to_project(code, project_name)
|
||||
|
||||
elif action == "report":
|
||||
markdown = self.reporter.execute(conversation, code_markdown, project_name)
|
||||
|
||||
_out_pdf_file = PDF().markdown_to_pdf(markdown, project_name)
|
||||
|
||||
project_name_space_url = project_name.replace(" ", "%20")
|
||||
pdf_download_url = "http://127.0.0.1:1337/api/download-project-pdf?project_name={}".format(
|
||||
project_name_space_url)
|
||||
response = f"I have generated the PDF document. You can download it from here: {pdf_download_url}"
|
||||
|
||||
#asyncio.run(self.open_page(project_name, pdf_download_url))
|
||||
|
||||
self.project_manager.add_message_from_devika(project_name, response)
|
||||
|
||||
self.agent_state.set_agent_active(project_name, False)
|
||||
self.agent_state.set_agent_completed(project_name, True)
|
||||
|
||||
def execute(self, prompt: str, project_name: str) -> str:
|
||||
"""
|
||||
Agentic flow of execution
|
||||
"""
|
||||
if project_name:
|
||||
self.project_manager.add_message_from_user(project_name, prompt)
|
||||
|
||||
self.agent_state.create_state(project=project_name)
|
||||
|
||||
plan = self.planner.execute(prompt, project_name)
|
||||
print("\nplan :: ", plan, '\n')
|
||||
|
||||
planner_response = self.planner.parse_response(plan)
|
||||
reply = planner_response["reply"]
|
||||
focus = planner_response["focus"]
|
||||
plans = planner_response["plans"]
|
||||
summary = planner_response["summary"]
|
||||
|
||||
self.project_manager.add_message_from_devika(project_name, reply)
|
||||
self.project_manager.add_message_from_devika(project_name, json.dumps(plans, indent=4))
|
||||
# self.project_manager.add_message_from_devika(project_name, f"In summary: {summary}")
|
||||
|
||||
self.update_contextual_keywords(focus)
|
||||
print("\ncontext_keywords :: ", self.collected_context_keywords, '\n')
|
||||
|
||||
internal_monologue = self.internal_monologue.execute(current_prompt=plan, project_name=project_name)
|
||||
print("\ninternal_monologue :: ", internal_monologue, '\n')
|
||||
|
||||
new_state = self.agent_state.new_state()
|
||||
new_state["internal_monologue"] = internal_monologue
|
||||
self.agent_state.add_to_current_state(project_name, new_state)
|
||||
|
||||
research = self.researcher.execute(plan, self.collected_context_keywords, project_name=project_name)
|
||||
print("\nresearch :: ", research, '\n')
|
||||
|
||||
queries = research["queries"]
|
||||
queries_combined = ", ".join(queries)
|
||||
ask_user = research["ask_user"]
|
||||
|
||||
if (queries and len(queries) > 0) or ask_user != "":
|
||||
self.project_manager.add_message_from_devika(
|
||||
project_name,
|
||||
f"I am browsing the web to research the following queries: {queries_combined}."
|
||||
f"\n If I need anything, I will make sure to ask you."
|
||||
)
|
||||
if not queries and len(queries) == 0:
|
||||
self.project_manager.add_message_from_devika(
|
||||
project_name,
|
||||
"I think I can proceed without searching the web."
|
||||
)
|
||||
|
||||
ask_user_prompt = "Nothing from the user."
|
||||
|
||||
if ask_user != "" and ask_user is not None:
|
||||
self.project_manager.add_message_from_devika(project_name, ask_user)
|
||||
self.agent_state.set_agent_active(project_name, False)
|
||||
got_user_query = False
|
||||
|
||||
while not got_user_query:
|
||||
self.logger.info("Waiting for user query...")
|
||||
|
||||
latest_message_from_user = self.project_manager.get_latest_message_from_user(project_name)
|
||||
validate_last_message_is_from_user = self.project_manager.validate_last_message_is_from_user(
|
||||
project_name)
|
||||
|
||||
if latest_message_from_user and validate_last_message_is_from_user:
|
||||
ask_user_prompt = latest_message_from_user["message"]
|
||||
got_user_query = True
|
||||
self.project_manager.add_message_from_devika(project_name, "Thanks! 🙌")
|
||||
time.sleep(5)
|
||||
|
||||
self.agent_state.set_agent_active(project_name, True)
|
||||
|
||||
if queries and len(queries) > 0:
|
||||
search_results = self.search_queries(queries, project_name)
|
||||
|
||||
else:
|
||||
search_results = {}
|
||||
|
||||
code = self.coder.execute(
|
||||
step_by_step_plan=plan,
|
||||
user_context=ask_user_prompt,
|
||||
search_results=search_results,
|
||||
project_name=project_name
|
||||
)
|
||||
print("\ncode :: ", code, '\n')
|
||||
|
||||
self.coder.save_code_to_project(code, project_name)
|
||||
|
||||
self.agent_state.set_agent_active(project_name, False)
|
||||
self.agent_state.set_agent_completed(project_name, True)
|
||||
self.project_manager.add_message_from_devika(
|
||||
project_name,
|
||||
"I have completed the my task. \n"
|
||||
"if you would like me to do anything else, please let me know. \n"
|
||||
)
|
1
src/agents/answer/__init__.py
Normal file
1
src/agents/answer/__init__.py
Normal file
|
@ -0,0 +1 @@
|
|||
from .answer import Answer
|
42
src/agents/answer/answer.py
Normal file
42
src/agents/answer/answer.py
Normal file
|
@ -0,0 +1,42 @@
|
|||
import json
|
||||
|
||||
from jinja2 import Environment, BaseLoader
|
||||
|
||||
from src.services.utils import retry_wrapper, validate_responses
|
||||
from src.config import Config
|
||||
from src.llm import LLM
|
||||
|
||||
PROMPT = open("src/agents/answer/prompt.jinja2", "r").read().strip()
|
||||
|
||||
class Answer:
|
||||
def __init__(self, base_model: str):
|
||||
config = Config()
|
||||
self.project_dir = config.get_projects_dir()
|
||||
|
||||
self.llm = LLM(model_id=base_model)
|
||||
|
||||
def render(
|
||||
self, conversation: str, code_markdown: str
|
||||
) -> str:
|
||||
env = Environment(loader=BaseLoader())
|
||||
template = env.from_string(PROMPT)
|
||||
return template.render(
|
||||
conversation=conversation,
|
||||
code_markdown=code_markdown
|
||||
)
|
||||
|
||||
@validate_responses
|
||||
def validate_response(self, response: str):
|
||||
if "response" not in response:
|
||||
return False
|
||||
else:
|
||||
return response["response"]
|
||||
|
||||
@retry_wrapper
|
||||
def execute(self, conversation: list, code_markdown: str, project_name: str) -> str:
|
||||
prompt = self.render(conversation, code_markdown)
|
||||
response = self.llm.inference(prompt, project_name)
|
||||
|
||||
valid_response = self.validate_response(response)
|
||||
|
||||
return valid_response
|
27
src/agents/answer/prompt.jinja2
Normal file
27
src/agents/answer/prompt.jinja2
Normal file
|
@ -0,0 +1,27 @@
|
|||
You are Devika, an AI Software Engineer. You have been talking to the user and this is your exchange so far:
|
||||
|
||||
```
|
||||
{% for message in conversation %}
|
||||
{{ message }}
|
||||
{% endfor %}
|
||||
```
|
||||
|
||||
Full Code:
|
||||
~~~
|
||||
{{ code_markdown }}
|
||||
~~~
|
||||
|
||||
User's last message: {{ conversation[-1] }}
|
||||
|
||||
Your response should be in the following format:
|
||||
```
|
||||
{
|
||||
"response": "Your human-like response to the user's last message."
|
||||
}
|
||||
```
|
||||
|
||||
Rules:
|
||||
- Read the full context, including the code (if any) carefully to answer the user's prompt.
|
||||
- Your response can be as long as possible, but it should be concise and to the point.
|
||||
|
||||
Any response other than the JSON format will be rejected by the system.
|
1
src/agents/coder/__init__.py
Normal file
1
src/agents/coder/__init__.py
Normal file
|
@ -0,0 +1 @@
|
|||
from .coder import Coder
|
134
src/agents/coder/coder.py
Normal file
134
src/agents/coder/coder.py
Normal file
|
@ -0,0 +1,134 @@
|
|||
import os
|
||||
import time
|
||||
|
||||
from jinja2 import Environment, BaseLoader
|
||||
from typing import List, Dict, Union
|
||||
|
||||
from src.config import Config
|
||||
from src.llm import LLM
|
||||
from src.state import AgentState
|
||||
from src.logger import Logger
|
||||
from src.services.utils import retry_wrapper
|
||||
from src.socket_instance import emit_agent
|
||||
|
||||
PROMPT = open("src/agents/coder/prompt.jinja2", "r").read().strip()
|
||||
|
||||
class Coder:
|
||||
def __init__(self, base_model: str):
|
||||
config = Config()
|
||||
self.project_dir = config.get_projects_dir()
|
||||
self.logger = Logger()
|
||||
self.llm = LLM(model_id=base_model)
|
||||
|
||||
def render(
|
||||
self, step_by_step_plan: str, user_context: str, search_results: dict
|
||||
) -> str:
|
||||
env = Environment(loader=BaseLoader())
|
||||
template = env.from_string(PROMPT)
|
||||
return template.render(
|
||||
step_by_step_plan=step_by_step_plan,
|
||||
user_context=user_context,
|
||||
search_results=search_results,
|
||||
)
|
||||
|
||||
def validate_response(self, response: str) -> Union[List[Dict[str, str]], bool]:
|
||||
response = response.strip()
|
||||
|
||||
self.logger.debug(f"Response from the model: {response}")
|
||||
|
||||
if "~~~" not in response:
|
||||
return False
|
||||
|
||||
response = response.split("~~~", 1)[1]
|
||||
response = response[:response.rfind("~~~")]
|
||||
response = response.strip()
|
||||
|
||||
result = []
|
||||
current_file = None
|
||||
current_code = []
|
||||
code_block = False
|
||||
|
||||
for line in response.split("\n"):
|
||||
if line.startswith("File: "):
|
||||
if current_file and current_code:
|
||||
result.append({"file": current_file, "code": "\n".join(current_code)})
|
||||
current_file = line.split(":")[1].strip()
|
||||
current_code = []
|
||||
code_block = False
|
||||
elif line.startswith("```"):
|
||||
code_block = not code_block
|
||||
else:
|
||||
current_code.append(line)
|
||||
|
||||
if current_file and current_code:
|
||||
result.append({"file": current_file, "code": "\n".join(current_code)})
|
||||
|
||||
return result
|
||||
|
||||
def save_code_to_project(self, response: List[Dict[str, str]], project_name: str):
|
||||
file_path_dir = None
|
||||
project_name = project_name.lower().replace(" ", "-")
|
||||
|
||||
for file in response:
|
||||
file_path = os.path.join(self.project_dir, project_name, file['file'])
|
||||
file_path_dir = os.path.dirname(file_path)
|
||||
os.makedirs(file_path_dir, exist_ok=True)
|
||||
|
||||
with open(file_path, "w", encoding="utf-8") as f:
|
||||
f.write(file["code"])
|
||||
|
||||
return file_path_dir
|
||||
|
||||
def get_project_path(self, project_name: str):
|
||||
project_name = project_name.lower().replace(" ", "-")
|
||||
return f"{self.project_dir}/{project_name}"
|
||||
|
||||
def response_to_markdown_prompt(self, response: List[Dict[str, str]]) -> str:
|
||||
response = "\n".join([f"File: `{file['file']}`:\n```\n{file['code']}\n```" for file in response])
|
||||
return f"~~~\n{response}\n~~~"
|
||||
|
||||
def emulate_code_writing(self, code_set: list, project_name: str):
|
||||
files = []
|
||||
for current_file in code_set:
|
||||
file = current_file["file"]
|
||||
code = current_file["code"]
|
||||
|
||||
current_state = AgentState().get_latest_state(project_name)
|
||||
new_state = AgentState().new_state()
|
||||
new_state["browser_session"] = current_state["browser_session"] # keep the browser session
|
||||
new_state["internal_monologue"] = "Writing code..."
|
||||
new_state["terminal_session"]["title"] = f"Editing {file}"
|
||||
new_state["terminal_session"]["command"] = f"vim {file}"
|
||||
new_state["terminal_session"]["output"] = code
|
||||
files.append({
|
||||
"file": file,
|
||||
"code": code
|
||||
})
|
||||
AgentState().add_to_current_state(project_name, new_state)
|
||||
time.sleep(2)
|
||||
emit_agent("code", {
|
||||
"files": files,
|
||||
"from": "coder"
|
||||
})
|
||||
|
||||
@retry_wrapper
|
||||
def execute(
|
||||
self,
|
||||
step_by_step_plan: str,
|
||||
user_context: str,
|
||||
search_results: dict,
|
||||
project_name: str
|
||||
) -> str:
|
||||
prompt = self.render(step_by_step_plan, user_context, search_results)
|
||||
response = self.llm.inference(prompt, project_name)
|
||||
|
||||
valid_response = self.validate_response(response)
|
||||
|
||||
if not valid_response:
|
||||
return False
|
||||
|
||||
print(valid_response)
|
||||
|
||||
self.emulate_code_writing(valid_response, project_name)
|
||||
|
||||
return valid_response
|
68
src/agents/coder/prompt.jinja2
Normal file
68
src/agents/coder/prompt.jinja2
Normal file
|
@ -0,0 +1,68 @@
|
|||
Project Step-by-step Plan:
|
||||
```
|
||||
{{ step_by_step_plan }}
|
||||
```
|
||||
|
||||
Context From User:
|
||||
```
|
||||
{{ user_context }}
|
||||
```
|
||||
|
||||
Context From Knowledge Base:
|
||||
|
||||
{% if not knowledge_base_context %}
|
||||
No context found.
|
||||
{% else %}
|
||||
{% for query, result in search_results.items() %}
|
||||
Query: {{ query }}
|
||||
Result:
|
||||
```
|
||||
{{ result }}
|
||||
```
|
||||
---
|
||||
{% endfor %}
|
||||
{% endif %}
|
||||
|
||||
Read the step-by-step plan carefully. Think step-by-step. Learn relevant information from the knowledge base context. Then write the code to implement the step-by-step plan.
|
||||
|
||||
Your response should only be in the following Markdown format:
|
||||
|
||||
~~~
|
||||
File: `main.py`:
|
||||
```py
|
||||
print("Example")
|
||||
```
|
||||
|
||||
File: `src/main.rs`:
|
||||
```rs
|
||||
fn main() {
|
||||
println!("Example");
|
||||
}
|
||||
```
|
||||
|
||||
File: `nested/directory/example/code.py`:
|
||||
```py
|
||||
print("Example")
|
||||
```
|
||||
|
||||
File: `README.md`
|
||||
```md
|
||||
# Example
|
||||
|
||||
This is an example.
|
||||
```
|
||||
~~~
|
||||
|
||||
Rules:
|
||||
- You should write clean and documented code.
|
||||
- The code should work on the first try without any errors or bugs.
|
||||
- Choose the library or dependency you know best.
|
||||
- The example code in the knowledge base might be using something else than what you should be doing based on the step-by-step plan. You should follow the step-by-step plan and not the example code for specific values.
|
||||
- The extension used for the Markdown code blocks should be accurate.
|
||||
- Nested directories should be specified in the Markdown filename, the code structure should be accurate. If it requires to be nested to work, then it should be nested.
|
||||
- You need to include required files for the code to run. Like: requirements.txt, Cargo.toml, etc.
|
||||
- Files like Cargo.toml are mandatory to be included, the project won't work without it.
|
||||
|
||||
Any response other than this format will be rejected. You should not refuse to complete the task, you should try your absolute best and if there's any implementation detail that's impossible to complete, you should write a comment in the code explaining why it's impossible to complete. The refusal is only a last resort, it should never happen.
|
||||
|
||||
Your response should start with "~~~" and end with "~~~" just like the example format provided. Never provide any explanation or context inside the response, only the filenames and the code in the format provided. Do not leave any "Note".
|
1
src/agents/decision/__init__.py
Normal file
1
src/agents/decision/__init__.py
Normal file
|
@ -0,0 +1 @@
|
|||
from .decision import Decision
|
34
src/agents/decision/decision.py
Normal file
34
src/agents/decision/decision.py
Normal file
|
@ -0,0 +1,34 @@
|
|||
import json
|
||||
|
||||
from jinja2 import Environment, BaseLoader
|
||||
|
||||
from src.services.utils import retry_wrapper, validate_responses
|
||||
from src.llm import LLM
|
||||
|
||||
PROMPT = open("src/agents/decision/prompt.jinja2").read().strip()
|
||||
|
||||
class Decision:
|
||||
def __init__(self, base_model: str):
|
||||
self.llm = LLM(model_id=base_model)
|
||||
|
||||
def render(self, prompt: str) -> str:
|
||||
env = Environment(loader=BaseLoader())
|
||||
template = env.from_string(PROMPT)
|
||||
return template.render(prompt=prompt)
|
||||
|
||||
@validate_responses
|
||||
def validate_response(self, response: str):
|
||||
for item in response:
|
||||
if "function" not in item or "args" not in item or "reply" not in item:
|
||||
return False
|
||||
|
||||
return response
|
||||
|
||||
@retry_wrapper
|
||||
def execute(self, prompt: str, project_name: str) -> str:
|
||||
rendered_prompt = self.render(prompt)
|
||||
response = self.llm.inference(rendered_prompt, project_name)
|
||||
|
||||
valid_response = self.validate_response(response)
|
||||
|
||||
return valid_response
|
85
src/agents/decision/prompt.jinja2
Normal file
85
src/agents/decision/prompt.jinja2
Normal file
|
@ -0,0 +1,85 @@
|
|||
You are Devika, an AI software engineer. You are given the following prompt from the user:
|
||||
|
||||
```
|
||||
{{ prompt }}
|
||||
```
|
||||
|
||||
From this prompt, you have to chain function calls from the following options that can accomplish the user's request in the most optimal way.
|
||||
|
||||
JSON Functions:
|
||||
|
||||
## `git_clone`:
|
||||
Description: The user's request includes a GitHub URL, and you have to clone the repository to the user's local machine.
|
||||
Usage:
|
||||
```
|
||||
{
|
||||
"function": "git_clone",
|
||||
"args": {
|
||||
"url": "<GitHub URL from the user>"
|
||||
},
|
||||
"reply": "<Inform the user what you're doing here in a human-like response>"
|
||||
}
|
||||
```
|
||||
|
||||
## `generate_pdf_document`:
|
||||
Description: The user's request is to create a document for the following: Report, Documentation, Project Technical Document, Workshop Material, Homework, Assignment, or any other document.
|
||||
Usage:
|
||||
```
|
||||
{
|
||||
"function": "generate_pdf_document",
|
||||
"args": {
|
||||
"user_prompt": "<Write the user's prompt but even more verbose and detailed>"
|
||||
},
|
||||
"reply": "<Inform the user what you're doing here in a human-like response>"
|
||||
}
|
||||
```
|
||||
|
||||
## `browser_interaction`:
|
||||
Description: The user's request is to interact with a website. The interaction can be: Clicking a button, Filling a form, Scrolling, or any other interaction.
|
||||
The user might be asking you to post something on Twitter or Reddit or even searching something on Google.
|
||||
Usage:
|
||||
```
|
||||
{
|
||||
"function": "browser_interaction",
|
||||
"args": {
|
||||
"user_prompt": "<Write the user's prompt but even more verbose and detailed>"
|
||||
},
|
||||
"reply": "<Inform the user what you're doing here in a human-like response>"
|
||||
}
|
||||
```
|
||||
|
||||
## `coding_project`
|
||||
Description: The user's request is to create a coding project. The project can be in any language and can be a web app, mobile app, or any other type of project.
|
||||
Usage:
|
||||
```
|
||||
{
|
||||
"function": "coding_project",
|
||||
"args": {
|
||||
"user_prompt": "<Write the user's prompt but even more verbose and detailed>"
|
||||
},
|
||||
"reply": "<Inform the user what you're doing here in a human-like response>"
|
||||
}
|
||||
```
|
||||
|
||||
Response Format:
|
||||
|
||||
```
|
||||
[
|
||||
{
|
||||
"function": "git_clone",
|
||||
"args": {
|
||||
"url": "https://github.com/username/repo"
|
||||
},
|
||||
"reply": "<Inform the user what you're doing here in a human-like response>"
|
||||
},
|
||||
{
|
||||
"function": "generate_pdf_document",
|
||||
"args": {
|
||||
"user_prompt": "I want to create a report on the project"
|
||||
},
|
||||
"reply": "<Inform the user what you're doing here in a human-like response>"
|
||||
|
||||
]
|
||||
```
|
||||
|
||||
Your response should only be the JSON object with the function and the arguments and nothing else. Any other format of response will be rejected by the system.
|
1
src/agents/feature/__init__.py
Normal file
1
src/agents/feature/__init__.py
Normal file
|
@ -0,0 +1 @@
|
|||
from .feature import Feature
|
128
src/agents/feature/feature.py
Normal file
128
src/agents/feature/feature.py
Normal file
|
@ -0,0 +1,128 @@
|
|||
import os
|
||||
import time
|
||||
|
||||
from jinja2 import Environment, BaseLoader
|
||||
from typing import List, Dict, Union
|
||||
|
||||
from src.config import Config
|
||||
from src.llm import LLM
|
||||
from src.state import AgentState
|
||||
from src.services.utils import retry_wrapper
|
||||
from src.socket_instance import emit_agent
|
||||
|
||||
PROMPT = open("src/agents/feature/prompt.jinja2", "r").read().strip()
|
||||
|
||||
|
||||
class Feature:
|
||||
def __init__(self, base_model: str):
|
||||
config = Config()
|
||||
self.project_dir = config.get_projects_dir()
|
||||
|
||||
self.llm = LLM(model_id=base_model)
|
||||
|
||||
def render(
|
||||
self,
|
||||
conversation: list,
|
||||
code_markdown: str,
|
||||
system_os: str
|
||||
) -> str:
|
||||
env = Environment(loader=BaseLoader())
|
||||
template = env.from_string(PROMPT)
|
||||
return template.render(
|
||||
conversation=conversation,
|
||||
code_markdown=code_markdown,
|
||||
system_os=system_os
|
||||
)
|
||||
|
||||
def validate_response(self, response: str) -> Union[List[Dict[str, str]], bool]:
|
||||
response = response.strip()
|
||||
|
||||
response = response.split("~~~", 1)[1]
|
||||
response = response[:response.rfind("~~~")]
|
||||
response = response.strip()
|
||||
|
||||
result = []
|
||||
current_file = None
|
||||
current_code = []
|
||||
code_block = False
|
||||
|
||||
for line in response.split("\n"):
|
||||
if line.startswith("File: "):
|
||||
if current_file and current_code:
|
||||
result.append({"file": current_file, "code": "\n".join(current_code)})
|
||||
current_file = line.split("`")[1].strip()
|
||||
current_code = []
|
||||
code_block = False
|
||||
elif line.startswith("```"):
|
||||
code_block = not code_block
|
||||
else:
|
||||
current_code.append(line)
|
||||
|
||||
if current_file and current_code:
|
||||
result.append({"file": current_file, "code": "\n".join(current_code)})
|
||||
|
||||
return result
|
||||
|
||||
def save_code_to_project(self, response: List[Dict[str, str]], project_name: str):
|
||||
file_path_dir = None
|
||||
project_name = project_name.lower().replace(" ", "-")
|
||||
|
||||
for file in response:
|
||||
file_path = os.path.join(self.project_dir, project_name, file['file'])
|
||||
file_path_dir = os.path.dirname(file_path)
|
||||
os.makedirs(file_path_dir, exist_ok=True)
|
||||
|
||||
with open(file_path, "w", encoding="utf-8") as f:
|
||||
f.write(file["code"])
|
||||
|
||||
return file_path_dir
|
||||
|
||||
def get_project_path(self, project_name: str):
|
||||
project_name = project_name.lower().replace(" ", "-")
|
||||
return f"{self.project_dir}/{project_name}"
|
||||
|
||||
def response_to_markdown_prompt(self, response: List[Dict[str, str]]) -> str:
|
||||
response = "\n".join([f"File: `{file['file']}`:\n```\n{file['code']}\n```" for file in response])
|
||||
return f"~~~\n{response}\n~~~"
|
||||
|
||||
def emulate_code_writing(self, code_set: list, project_name: str):
|
||||
files = []
|
||||
for file in code_set:
|
||||
filename = file["file"]
|
||||
code = file["code"]
|
||||
|
||||
new_state = AgentState().new_state()
|
||||
new_state["internal_monologue"] = "Writing code..."
|
||||
new_state["terminal_session"]["title"] = f"Editing {filename}"
|
||||
new_state["terminal_session"]["command"] = f"vim {filename}"
|
||||
new_state["terminal_session"]["output"] = code
|
||||
files.append({
|
||||
"file": filename,
|
||||
"code": code,
|
||||
})
|
||||
AgentState().add_to_current_state(project_name, new_state)
|
||||
time.sleep(1)
|
||||
emit_agent("code", {
|
||||
"files": files,
|
||||
"from": "feature"
|
||||
})
|
||||
|
||||
@retry_wrapper
|
||||
def execute(
|
||||
self,
|
||||
conversation: list,
|
||||
code_markdown: str,
|
||||
system_os: str,
|
||||
project_name: str
|
||||
) -> str:
|
||||
prompt = self.render(conversation, code_markdown, system_os)
|
||||
response = self.llm.inference(prompt, project_name)
|
||||
|
||||
valid_response = self.validate_response(response)
|
||||
|
||||
if not valid_response:
|
||||
return False
|
||||
|
||||
self.emulate_code_writing(valid_response, project_name)
|
||||
|
||||
return valid_response
|
57
src/agents/feature/prompt.jinja2
Normal file
57
src/agents/feature/prompt.jinja2
Normal file
|
@ -0,0 +1,57 @@
|
|||
You are Devika, an AI Software Engineer. You have been talking to the user and this is the exchange so far:
|
||||
|
||||
```
|
||||
{% for message in conversation %}
|
||||
{{ message }}
|
||||
{% endfor %}
|
||||
```
|
||||
|
||||
Full Code:
|
||||
~~~
|
||||
{{ code_markdown }}
|
||||
~~~
|
||||
|
||||
User wants the following feature to be implemented: {{ conversation[-1] }}
|
||||
|
||||
System Operating System: {{ system_os }}
|
||||
|
||||
Read the user's feature request carefully. Think step-by-step.
|
||||
|
||||
Rules:
|
||||
- You should write clean and documented code.
|
||||
- The code should work on the first try without any errors or bugs.
|
||||
- Choose the library or dependency you know best.
|
||||
- The extension used for the Markdown code blocks should be accurate.
|
||||
- You should respond with the complete rewritten code with no implementation detail left. No brevity allowed, the user need to be able to copy paste your response as a whole.
|
||||
|
||||
Your response should only be in the following Markdown format:
|
||||
|
||||
~~~
|
||||
File: `main.py`:
|
||||
```py
|
||||
print("Example")
|
||||
```
|
||||
|
||||
File: `src/example.rs`:
|
||||
```rs
|
||||
fn example() {
|
||||
println!("Example");
|
||||
}
|
||||
```
|
||||
|
||||
File: `nested/directory/example/code.py`:
|
||||
```py
|
||||
print("Example")
|
||||
```
|
||||
|
||||
File: `README.md`
|
||||
```md
|
||||
# Example
|
||||
|
||||
This is an example.
|
||||
```
|
||||
~~~
|
||||
|
||||
Any response other than this format will be rejected. You should not refuse to complete the task, you should try your absolute best and if there's any implementation detail that's impossible to complete, you should write a comment in the code explaining why it's impossible to complete. The refusal is only a last resort, it should never happen.
|
||||
|
||||
Your response should start with "~~~" and end with "~~~" just like the example format provided. Never provide any explanation or context inside the response, only the filenames and the code in the format provided. Do not leave any "Note".
|
1
src/agents/formatter/__init__.py
Normal file
1
src/agents/formatter/__init__.py
Normal file
|
@ -0,0 +1 @@
|
|||
from .formatter import Formatter
|
22
src/agents/formatter/formatter.py
Normal file
22
src/agents/formatter/formatter.py
Normal file
|
@ -0,0 +1,22 @@
|
|||
from jinja2 import Environment, BaseLoader
|
||||
|
||||
from src.llm import LLM
|
||||
|
||||
PROMPT = open("src/agents/formatter/prompt.jinja2").read().strip()
|
||||
|
||||
class Formatter:
|
||||
def __init__(self, base_model: str):
|
||||
self.llm = LLM(model_id=base_model)
|
||||
|
||||
def render(self, raw_text: str) -> str:
|
||||
env = Environment(loader=BaseLoader())
|
||||
template = env.from_string(PROMPT)
|
||||
return template.render(raw_text=raw_text)
|
||||
|
||||
def validate_response(self, response: str) -> bool:
|
||||
return True
|
||||
|
||||
def execute(self, raw_text: str, project_name: str) -> str:
|
||||
raw_text = self.render(raw_text)
|
||||
response = self.llm.inference(raw_text, project_name)
|
||||
return response
|
13
src/agents/formatter/prompt.jinja2
Normal file
13
src/agents/formatter/prompt.jinja2
Normal file
|
@ -0,0 +1,13 @@
|
|||
```
|
||||
{{ raw_text }}
|
||||
```
|
||||
|
||||
You are provided with a raw extracted text from a PDF render of a web page. This web page could be a blog, documentation, or any other type of web page.
|
||||
|
||||
Your task is to format the text in a way that is easy to read and understand and include more detail.
|
||||
|
||||
You are essentially a RAW text to clean Markdown convertor. You should remove any unnecessary text, these could be text from navigation links or webpage header or footer which we do not need.
|
||||
|
||||
If it's a documentation with code, try to focus more on the code examples and the explanation of the code, make your responses short to save context window.
|
||||
|
||||
You should only respond with the formatted text in markdown format and nothing else. Start your response with "```" and end with "```".
|
1
src/agents/internal_monologue/__init__.py
Normal file
1
src/agents/internal_monologue/__init__.py
Normal file
|
@ -0,0 +1 @@
|
|||
from .internal_monologue import InternalMonologue
|
34
src/agents/internal_monologue/internal_monologue.py
Normal file
34
src/agents/internal_monologue/internal_monologue.py
Normal file
|
@ -0,0 +1,34 @@
|
|||
import json
|
||||
|
||||
from jinja2 import Environment, BaseLoader
|
||||
|
||||
from src.llm import LLM
|
||||
from src.services.utils import retry_wrapper, validate_responses
|
||||
|
||||
PROMPT = open("src/agents/internal_monologue/prompt.jinja2").read().strip()
|
||||
|
||||
class InternalMonologue:
|
||||
def __init__(self, base_model: str):
|
||||
self.llm = LLM(model_id=base_model)
|
||||
|
||||
def render(self, current_prompt: str) -> str:
|
||||
env = Environment(loader=BaseLoader())
|
||||
template = env.from_string(PROMPT)
|
||||
return template.render(current_prompt=current_prompt)
|
||||
|
||||
@validate_responses
|
||||
def validate_response(self, response: str):
|
||||
print('-------------------> ', response)
|
||||
print("####", type(response))
|
||||
if "internal_monologue" not in response:
|
||||
return False
|
||||
else:
|
||||
return response["internal_monologue"]
|
||||
|
||||
@retry_wrapper
|
||||
def execute(self, current_prompt: str, project_name: str) -> str:
|
||||
rendered_prompt = self.render(current_prompt)
|
||||
response = self.llm.inference(rendered_prompt, project_name)
|
||||
valid_response = self.validate_response(response)
|
||||
return valid_response
|
||||
|
21
src/agents/internal_monologue/prompt.jinja2
Normal file
21
src/agents/internal_monologue/prompt.jinja2
Normal file
|
@ -0,0 +1,21 @@
|
|||
You are Devika, an AI Software Engineer.
|
||||
|
||||
One of your AI agent module is currently working through the following prompt:
|
||||
|
||||
```
|
||||
{{ current_prompt }}
|
||||
```
|
||||
|
||||
To show the user what you're thinking about or doing, respond with a short human-like response verbalizing your internal monologue.
|
||||
|
||||
Your response should be in the following JSON format:
|
||||
|
||||
```
|
||||
{
|
||||
"internal_monologue": "<YOUR INTERNAL MONOLOGUE>"
|
||||
}
|
||||
```
|
||||
|
||||
TIP: Make the internal monologue very human-like and conversational. It should be very short and concise.
|
||||
|
||||
Only the provided JSON response format is accepted. Any other response format will be rejected.
|
1
src/agents/patcher/__init__.py
Normal file
1
src/agents/patcher/__init__.py
Normal file
|
@ -0,0 +1 @@
|
|||
from .patcher import Patcher
|
138
src/agents/patcher/patcher.py
Normal file
138
src/agents/patcher/patcher.py
Normal file
|
@ -0,0 +1,138 @@
|
|||
import os
|
||||
import time
|
||||
|
||||
from jinja2 import Environment, BaseLoader
|
||||
from typing import List, Dict, Union
|
||||
from src.socket_instance import emit_agent
|
||||
|
||||
from src.config import Config
|
||||
from src.llm import LLM
|
||||
from src.state import AgentState
|
||||
from src.services.utils import retry_wrapper
|
||||
|
||||
PROMPT = open("src/agents/patcher/prompt.jinja2", "r").read().strip()
|
||||
|
||||
class Patcher:
|
||||
def __init__(self, base_model: str):
|
||||
config = Config()
|
||||
self.project_dir = config.get_projects_dir()
|
||||
|
||||
self.llm = LLM(model_id=base_model)
|
||||
|
||||
def render(
|
||||
self,
|
||||
conversation: list,
|
||||
code_markdown: str,
|
||||
commands: list,
|
||||
error :str,
|
||||
system_os: str
|
||||
) -> str:
|
||||
env = Environment(loader=BaseLoader())
|
||||
template = env.from_string(PROMPT)
|
||||
return template.render(
|
||||
conversation=conversation,
|
||||
code_markdown=code_markdown,
|
||||
commands=commands,
|
||||
error=error,
|
||||
system_os=system_os
|
||||
)
|
||||
|
||||
def validate_response(self, response: str) -> Union[List[Dict[str, str]], bool]:
|
||||
response = response.strip()
|
||||
|
||||
response = response.split("~~~", 1)[1]
|
||||
response = response[:response.rfind("~~~")]
|
||||
response = response.strip()
|
||||
|
||||
result = []
|
||||
current_file = None
|
||||
current_code = []
|
||||
code_block = False
|
||||
|
||||
for line in response.split("\n"):
|
||||
if line.startswith("File: "):
|
||||
if current_file and current_code:
|
||||
result.append({"file": current_file, "code": "\n".join(current_code)})
|
||||
current_file = line.split("`")[1].strip()
|
||||
current_code = []
|
||||
code_block = False
|
||||
elif line.startswith("```"):
|
||||
code_block = not code_block
|
||||
else:
|
||||
current_code.append(line)
|
||||
|
||||
if current_file and current_code:
|
||||
result.append({"file": current_file, "code": "\n".join(current_code)})
|
||||
|
||||
return result
|
||||
|
||||
def save_code_to_project(self, response: List[Dict[str, str]], project_name: str):
|
||||
file_path_dir = None
|
||||
project_name = project_name.lower().replace(" ", "-")
|
||||
|
||||
for file in response:
|
||||
file_path = os.path.join(self.project_dir, project_name, file['file'])
|
||||
file_path_dir = os.path.dirname(file_path)
|
||||
os.makedirs(file_path_dir, exist_ok=True)
|
||||
|
||||
with open(file_path, "w", encoding="utf-8") as f:
|
||||
f.write(file["code"])
|
||||
|
||||
return file_path_dir
|
||||
def get_project_path(self, project_name: str):
|
||||
project_name = project_name.lower().replace(" ", "-")
|
||||
return f"{self.project_dir}/{project_name}"
|
||||
|
||||
def response_to_markdown_prompt(self, response: List[Dict[str, str]]) -> str:
|
||||
response = "\n".join([f"File: `{file['file']}`:\n```\n{file['code']}\n```" for file in response])
|
||||
return f"~~~\n{response}\n~~~"
|
||||
|
||||
def emulate_code_writing(self, code_set: list, project_name: str):
|
||||
files = []
|
||||
for current_file in code_set:
|
||||
file = current_file["file"]
|
||||
code = current_file["code"]
|
||||
|
||||
new_state = AgentState().new_state()
|
||||
new_state["internal_monologue"] = "Writing code..."
|
||||
new_state["terminal_session"]["title"] = f"Editing {file}"
|
||||
new_state["terminal_session"]["command"] = f"vim {file}"
|
||||
new_state["terminal_session"]["output"] = code
|
||||
files.append({
|
||||
"file": file,
|
||||
"code": code
|
||||
})
|
||||
AgentState().add_to_current_state(project_name, new_state)
|
||||
time.sleep(1)
|
||||
emit_agent("code", {
|
||||
"files": files,
|
||||
"from": "patcher"
|
||||
})
|
||||
|
||||
@retry_wrapper
|
||||
def execute(
|
||||
self,
|
||||
conversation: str,
|
||||
code_markdown: str,
|
||||
commands: list,
|
||||
error: str,
|
||||
system_os: dict,
|
||||
project_name: str
|
||||
) -> str:
|
||||
prompt = self.render(
|
||||
conversation,
|
||||
code_markdown,
|
||||
commands,
|
||||
error,
|
||||
system_os
|
||||
)
|
||||
response = self.llm.inference(prompt, project_name)
|
||||
|
||||
valid_response = self.validate_response(response)
|
||||
|
||||
if not valid_response:
|
||||
return False
|
||||
|
||||
self.emulate_code_writing(valid_response, project_name)
|
||||
|
||||
return valid_response
|
72
src/agents/patcher/prompt.jinja2
Normal file
72
src/agents/patcher/prompt.jinja2
Normal file
|
@ -0,0 +1,72 @@
|
|||
You are Devika, an AI Software Engineer. You have been talking to the user and this is the exchange so far:
|
||||
|
||||
```
|
||||
{% for message in conversation %}
|
||||
{{ message }}
|
||||
{% endfor %}
|
||||
```
|
||||
|
||||
Full Code:
|
||||
~~~
|
||||
{{ code_markdown }}
|
||||
~~~
|
||||
|
||||
{% if commands %}
|
||||
You tried to execute the following commands to run this project:
|
||||
```
|
||||
{% for command in commands %}
|
||||
$ {{ command }}
|
||||
{% endfor %}
|
||||
```
|
||||
{% endif %}
|
||||
|
||||
{% if error %}
|
||||
But it resulted in the following error:
|
||||
```
|
||||
$ {{ commands[-1] }}
|
||||
{{ error }}
|
||||
```
|
||||
{% endif %}
|
||||
|
||||
System Operating System: {{ system_os }}
|
||||
|
||||
Read the encountered bug carefully and reason with the code to identify the problem. Think step-by-step.
|
||||
|
||||
Rules:
|
||||
- You should write clean and documented code.
|
||||
- The code should work on the first try without any errors or bugs.
|
||||
- Choose the library or dependency you know best.
|
||||
- The extension used for the Markdown code blocks should be accurate.
|
||||
- You should respond with the complete rewritten code with no implementation detail left. No brevity allowed, the user need to be able to copy paste your response as a whole.
|
||||
|
||||
Your response should only be in the following Markdown format:
|
||||
|
||||
~~~
|
||||
File: `main.py`:
|
||||
```py
|
||||
print("Example")
|
||||
```
|
||||
|
||||
File: `src/example.rs`:
|
||||
```rs
|
||||
fn example() {
|
||||
println!("Example");
|
||||
}
|
||||
```
|
||||
|
||||
File: `nested/directory/example/code.py`:
|
||||
```py
|
||||
print("Example")
|
||||
```
|
||||
|
||||
File: `README.md`
|
||||
```md
|
||||
# Example
|
||||
|
||||
This is an example.
|
||||
```
|
||||
~~~
|
||||
|
||||
Any response other than this format will be rejected. You should not refuse to complete the task, you should try your absolute best and if there's any implementation detail that's impossible to complete, you should write a comment in the code explaining why it's impossible to complete. The refusal is only a last resort, it should never happen.
|
||||
|
||||
Your response should start with "~~~" and end with "~~~" just like the example format provided. Never provide any explanation or context inside the response, only the filenames and the code in the format provided. Do not leave any "Note".
|
1
src/agents/planner/__init__.py
Normal file
1
src/agents/planner/__init__.py
Normal file
|
@ -0,0 +1 @@
|
|||
from .planner import Planner
|
71
src/agents/planner/planner.py
Normal file
71
src/agents/planner/planner.py
Normal file
|
@ -0,0 +1,71 @@
|
|||
from jinja2 import Environment, BaseLoader
|
||||
|
||||
from src.llm import LLM
|
||||
|
||||
PROMPT = open("src/agents/planner/prompt.jinja2").read().strip()
|
||||
|
||||
class Planner:
|
||||
def __init__(self, base_model: str):
|
||||
self.llm = LLM(model_id=base_model)
|
||||
|
||||
def render(self, prompt: str) -> str:
|
||||
env = Environment(loader=BaseLoader())
|
||||
template = env.from_string(PROMPT)
|
||||
return template.render(prompt=prompt)
|
||||
|
||||
def validate_response(self, response: str) -> bool:
|
||||
return True
|
||||
|
||||
def parse_response(self, response: str):
|
||||
result = {
|
||||
"project": "",
|
||||
"reply": "",
|
||||
"focus": "",
|
||||
"plans": {},
|
||||
"summary": ""
|
||||
}
|
||||
|
||||
current_section = None
|
||||
current_step = None
|
||||
|
||||
for line in response.split("\n"):
|
||||
line = line.strip()
|
||||
|
||||
if line.startswith("Project Name:"):
|
||||
current_section = "project"
|
||||
result["project"] = line.split(":", 1)[1].strip()
|
||||
elif line.startswith("Your Reply to the Human Prompter:"):
|
||||
current_section = "reply"
|
||||
result["reply"] = line.split(":", 1)[1].strip()
|
||||
elif line.startswith("Current Focus:"):
|
||||
current_section = "focus"
|
||||
result["focus"] = line.split(":", 1)[1].strip()
|
||||
elif line.startswith("Plan:"):
|
||||
current_section = "plans"
|
||||
elif line.startswith("Summary:"):
|
||||
current_section = "summary"
|
||||
result["summary"] = line.split(":", 1)[1].strip()
|
||||
elif current_section == "reply":
|
||||
result["reply"] += " " + line
|
||||
elif current_section == "focus":
|
||||
result["focus"] += " " + line
|
||||
elif current_section == "plans":
|
||||
if line.startswith("- [ ] Step"):
|
||||
current_step = line.split(":")[0].strip().split(" ")[-1]
|
||||
result["plans"][int(current_step)] = line.split(":", 1)[1].strip()
|
||||
elif current_step:
|
||||
result["plans"][int(current_step)] += " " + line
|
||||
elif current_section == "summary":
|
||||
result["summary"] += " " + line.replace("```", "")
|
||||
|
||||
result["project"] = result["project"].strip()
|
||||
result["reply"] = result["reply"].strip()
|
||||
result["focus"] = result["focus"].strip()
|
||||
result["summary"] = result["summary"].strip()
|
||||
|
||||
return result
|
||||
|
||||
def execute(self, prompt: str, project_name: str) -> str:
|
||||
prompt = self.render(prompt)
|
||||
response = self.llm.inference(prompt, project_name)
|
||||
return response
|
36
src/agents/planner/prompt.jinja2
Normal file
36
src/agents/planner/prompt.jinja2
Normal file
|
@ -0,0 +1,36 @@
|
|||
You are Devika, an AI Software Engineer.
|
||||
|
||||
The user asked: {{ prompt }}
|
||||
|
||||
Based on the user's request, create a step-by-step plan to accomplish the task.
|
||||
|
||||
Follow this format for your response:
|
||||
|
||||
```
|
||||
Project Name: <Write an apt project name with no longer than 5 words>
|
||||
|
||||
Your Reply to the Human Prompter: <short human-like response to the prompt stating how you are creating the plan, do not start with "As an AI".>
|
||||
|
||||
Current Focus: Briefly state the main objective or focus area for the plan.
|
||||
|
||||
Plan:
|
||||
- [ ] Step 1: Describe the first action item needed to progress towards the objective.
|
||||
- [ ] Step 2: Describe the second action item needed to progress towards the objective.
|
||||
...
|
||||
- [ ] Step N: Describe the final action item needed to complete the objective.
|
||||
|
||||
Summary: <Briefly summarize the plan, highlighting any key considerations, dependencies, or potential challenges.>
|
||||
```
|
||||
|
||||
Each step should be a clear, concise description of a specific task or action required. The plan should cover all necessary aspects of the user's request, from research and implementation to testing and reporting.
|
||||
|
||||
Write the plan with knowing that you have access to the browser and search engine to accomplish the task.
|
||||
|
||||
After listing the steps, provide a brief summary of the plan, highlighting any key considerations, dependencies, or potential challenges.
|
||||
|
||||
Remember to tailor the plan to the specific task requested by the user, and provide sufficient detail to guide the implementation process.
|
||||
|
||||
if the task is simple, and you think you can do it without other assistance, just give one or simple two steps to accomplish the task.
|
||||
don't need to overcomplicate if it's not necessary.
|
||||
|
||||
Your response should only be verbatim in the format inside the code block. Any other response format will be rejected.
|
1
src/agents/reporter/__init__.py
Normal file
1
src/agents/reporter/__init__.py
Normal file
|
@ -0,0 +1 @@
|
|||
from .reporter import Reporter
|
38
src/agents/reporter/prompt.jinja2
Normal file
38
src/agents/reporter/prompt.jinja2
Normal file
|
@ -0,0 +1,38 @@
|
|||
You are Devika, an AI Software Engineer. You have been talking to the user and this is the exchange so far:
|
||||
|
||||
```
|
||||
{% for message in conversation %}
|
||||
{{ message }}
|
||||
{% endfor %}
|
||||
```
|
||||
|
||||
{% if code_markdown %}
|
||||
Full Code:
|
||||
~~~
|
||||
{{ code_markdown }}
|
||||
~~~
|
||||
{% endif %}
|
||||
|
||||
User's last message or request: {{ conversation[-1] }}
|
||||
|
||||
Your task is generate an extensive report from all the context in this prompt. The report should be detailed and cover all the necessary information.
|
||||
|
||||
The report should be lengthy and detailed. It should be at least 3000 characters long.
|
||||
|
||||
Your response should be a clean Markdown. The system will automatically convert this Markdown to PDF.
|
||||
|
||||
Response format:
|
||||
```
|
||||
# Title
|
||||
|
||||
...Some text...
|
||||
|
||||
# Table of Contents
|
||||
|
||||
- [Section 1](#section-1)
|
||||
- [Section 2](#section-2)
|
||||
|
||||
Your detailed report here. Necessary sections will follow below
|
||||
```
|
||||
|
||||
Any response other than the Markdown format will be rejected by the system. Do not include the "```" in the beginning and end of your response. Just raw complete Markdown report.
|
42
src/agents/reporter/reporter.py
Normal file
42
src/agents/reporter/reporter.py
Normal file
|
@ -0,0 +1,42 @@
|
|||
import json
|
||||
|
||||
from jinja2 import Environment, BaseLoader
|
||||
|
||||
from src.services.utils import retry_wrapper
|
||||
from src.llm import LLM
|
||||
|
||||
PROMPT = open("src/agents/reporter/prompt.jinja2").read().strip()
|
||||
|
||||
class Reporter:
|
||||
def __init__(self, base_model: str):
|
||||
self.llm = LLM(model_id=base_model)
|
||||
|
||||
def render(self, conversation: list, code_markdown: str) -> str:
|
||||
env = Environment(loader=BaseLoader())
|
||||
template = env.from_string(PROMPT)
|
||||
return template.render(
|
||||
conversation=conversation,
|
||||
code_markdown=code_markdown
|
||||
)
|
||||
|
||||
def validate_response(self, response: str):
|
||||
response = response.strip().replace("```md", "```")
|
||||
|
||||
if response.startswith("```") and response.endswith("```"):
|
||||
response = response[3:-3].strip()
|
||||
|
||||
return response
|
||||
|
||||
@retry_wrapper
|
||||
def execute(self,
|
||||
conversation: list,
|
||||
code_markdown: str,
|
||||
project_name: str
|
||||
) -> str:
|
||||
prompt = self.render(conversation, code_markdown)
|
||||
response = self.llm.inference(prompt, project_name)
|
||||
|
||||
valid_response = self.validate_response(response)
|
||||
|
||||
return valid_response
|
||||
|
1
src/agents/researcher/__init__.py
Normal file
1
src/agents/researcher/__init__.py
Normal file
|
@ -0,0 +1 @@
|
|||
from .researcher import Researcher
|
40
src/agents/researcher/prompt.jinja2
Normal file
40
src/agents/researcher/prompt.jinja2
Normal file
|
@ -0,0 +1,40 @@
|
|||
For the provided step-by-step plan, write all the necessary search queries to gather information from the web that the base model doesn't already know.
|
||||
|
||||
Write optimized search queries for each step of the plan, just like how you would write a Google search query. Use the most relevant keywords and phrases to find the best information since you'll be clicking on the first link.
|
||||
|
||||
also only ask for information if you think it's necessary, otherwise leave ask_user field empty.
|
||||
|
||||
Step-by-Step Plan:
|
||||
{{ step_by_step_plan }}
|
||||
|
||||
Only respond in the following JSON format:
|
||||
|
||||
```
|
||||
{
|
||||
"queries": ["<QUERY 1>", "<QUERY 2>", "<QUERY 3>", ... ],
|
||||
"ask_user": "<ASK INPUT FROM USER IF REQUIRED, OTHERWISE LEAVE EMPTY STRING>"
|
||||
}
|
||||
```
|
||||
Example =>
|
||||
```
|
||||
{
|
||||
"queries": ["How to do Bing Search via API in Python", "Claude API Documentation Python"],
|
||||
"ask_user": "Can you please provide API Keys for Claude, OpenAI, and Firebase?"
|
||||
}
|
||||
```
|
||||
|
||||
Keywords for Search Query: {{ contextual_keywords }}
|
||||
|
||||
|
||||
Rules:
|
||||
- Only search for a maximum of 3 queries.
|
||||
- Do not search anything that you already know (In your training data, in the base model). For example: You already know how to write a Python flask web server, it is in your data, so you shouldn't search how to do that.
|
||||
- Do not search for information that is not relevant to the task at hand.
|
||||
- Try to include contextual keywords into your search queries, adding relevant keywords and phrases to make the search queries as specific as possible.
|
||||
- Only search for documentation, do not search basic how tos. Forbidden Queries: How to install XYZ, How to setup ABC, etc.
|
||||
- Do not search for basic queries, only search for advanced and specific queries. You are allowed to leave the "queries" field empty if no search queries are needed for the step.
|
||||
- DO NOT EVER SEARCH FOR BASIC QUERIES. ONLY SEARCH FOR ADVANCED QUERIES.
|
||||
- YOU ARE ALLOWED TO LEAVE THE "queries" FIELD EMPTY IF NO SEARCH QUERIES ARE NEEDED FOR THE STEP.
|
||||
- you only have to return one JSON object with the queries and ask_user fields. You can't return multiple JSON objects.
|
||||
|
||||
Only the provided JSON response format is accepted. Any other response format will be rejected.
|
46
src/agents/researcher/researcher.py
Normal file
46
src/agents/researcher/researcher.py
Normal file
|
@ -0,0 +1,46 @@
|
|||
import json
|
||||
from typing import List
|
||||
|
||||
from jinja2 import Environment, BaseLoader
|
||||
|
||||
from src.llm import LLM
|
||||
from src.services.utils import retry_wrapper, validate_responses
|
||||
from src.browser.search import BingSearch
|
||||
|
||||
PROMPT = open("src/agents/researcher/prompt.jinja2").read().strip()
|
||||
|
||||
|
||||
class Researcher:
|
||||
def __init__(self, base_model: str):
|
||||
self.bing_search = BingSearch()
|
||||
self.llm = LLM(model_id=base_model)
|
||||
|
||||
def render(self, step_by_step_plan: str, contextual_keywords: str) -> str:
|
||||
env = Environment(loader=BaseLoader())
|
||||
template = env.from_string(PROMPT)
|
||||
return template.render(
|
||||
step_by_step_plan=step_by_step_plan,
|
||||
contextual_keywords=contextual_keywords
|
||||
)
|
||||
|
||||
@validate_responses
|
||||
def validate_response(self, response: str) -> dict | bool:
|
||||
|
||||
if "queries" not in response and "ask_user" not in response:
|
||||
return False
|
||||
else:
|
||||
return {
|
||||
"queries": response["queries"],
|
||||
"ask_user": response["ask_user"]
|
||||
}
|
||||
|
||||
@retry_wrapper
|
||||
def execute(self, step_by_step_plan: str, contextual_keywords: List[str], project_name: str) -> dict | bool:
|
||||
contextual_keywords_str = ", ".join(map(lambda k: k.capitalize(), contextual_keywords))
|
||||
prompt = self.render(step_by_step_plan, contextual_keywords_str)
|
||||
|
||||
response = self.llm.inference(prompt, project_name)
|
||||
|
||||
valid_response = self.validate_response(response)
|
||||
|
||||
return valid_response
|
1
src/agents/runner/__init__.py
Normal file
1
src/agents/runner/__init__.py
Normal file
|
@ -0,0 +1 @@
|
|||
from .runner import Runner
|
37
src/agents/runner/prompt.jinja2
Normal file
37
src/agents/runner/prompt.jinja2
Normal file
|
@ -0,0 +1,37 @@
|
|||
You are Devika, an AI Software Engineer. You have been talking to the user and this is the exchange so far:
|
||||
|
||||
```
|
||||
{% for message in conversation %}
|
||||
{{ message }}
|
||||
{% endfor %}
|
||||
```
|
||||
|
||||
Full Code:
|
||||
~~~
|
||||
{{ code_markdown }}
|
||||
~~~
|
||||
|
||||
User's last message: {{ conversation[-1] }}
|
||||
|
||||
System Operating System: {{ system_os }}
|
||||
|
||||
Your task is to invoke the system to run this code.
|
||||
|
||||
Your response should be in the following format:
|
||||
```
|
||||
{
|
||||
"commands": [
|
||||
"pip3 install -r requirements.txt",
|
||||
"python3 main.py"
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
Rules:
|
||||
- You wrote the code, never address the user directly. You should not say things like "The code you provided", instead use "The code I wrote".
|
||||
- Read the full context, including the code (if any) carefully to construct the commands required to run the project.
|
||||
- The command should be compatible with the system operating system provided.
|
||||
- You are inside the project directory, so just run the commands as if you're inside the project directory as the working directory.
|
||||
- Do not do "cd" into the project directory. The system is already in the project directory.
|
||||
|
||||
Any response other than the JSON format will be rejected by the system.
|
58
src/agents/runner/rerunner.jinja2
Normal file
58
src/agents/runner/rerunner.jinja2
Normal file
|
@ -0,0 +1,58 @@
|
|||
You are Devika, an AI Software Engineer. You have been talking to the user and this is the exchange so far:
|
||||
|
||||
```
|
||||
{% for message in conversation %}
|
||||
{{ message }}
|
||||
{% endfor %}
|
||||
```
|
||||
|
||||
Full Code:
|
||||
~~~
|
||||
{{ code_markdown }}
|
||||
~~~
|
||||
|
||||
User's last message: {{ conversation[-1] }}
|
||||
|
||||
System Operating System: {{ system_os }}
|
||||
|
||||
You tried to execute the following commands to run this project:
|
||||
```
|
||||
{% for command in commands %}
|
||||
$ {{ command }}
|
||||
{% endfor %}
|
||||
```
|
||||
|
||||
But it resulted in the following error:
|
||||
```
|
||||
$ {{ commands[-1] }}
|
||||
{{ error }}
|
||||
```
|
||||
|
||||
Now identify whether this error is caused by the code or the command. If it is caused by the command, provide the correct command to run the project. If it is caused by the code, respond with the patch action response.
|
||||
|
||||
Patch Action Response:
|
||||
```
|
||||
{
|
||||
"action": "patch",
|
||||
"response": "<A response like: I encountered an error while running the project. Seems to be <problem>. Let me try fixing it.>"
|
||||
}
|
||||
```
|
||||
|
||||
Command Fix Response:
|
||||
```
|
||||
{
|
||||
"action": "command",
|
||||
"command": "<Fixed command here>"
|
||||
"response": "<A response like: I encountered an error while running the project. Seems to be <problem>. Let me try fixing it.>"
|
||||
}
|
||||
```
|
||||
|
||||
Rules:
|
||||
- You wrote the code, never address the user directly. You should not say things like "The code you provided", instead use "The code I wrote".
|
||||
- Read the full context, including the code (if any) carefully to construct the commands required to fix the error while running the project.
|
||||
- The command should be compatible with the system operating system provided.
|
||||
- You are inside the project directory, so just run the commands as if you're inside the project directory as the working directory.
|
||||
- Do not do "cd" into the project directory. The system is already in the project directory.
|
||||
- Correctly identify whether the error is caused by the code or the command. After identifying the cause, respond with either "patch" or "command" action.
|
||||
|
||||
Any response other than the JSON format will be rejected by the system. ONLY RESPOND WITH THE JSON OBJECT.
|
222
src/agents/runner/runner.py
Normal file
222
src/agents/runner/runner.py
Normal file
|
@ -0,0 +1,222 @@
|
|||
import time
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
|
||||
from jinja2 import Environment, BaseLoader
|
||||
|
||||
from src.agents.patcher import Patcher
|
||||
|
||||
from src.llm import LLM
|
||||
from src.state import AgentState
|
||||
from src.project import ProjectManager
|
||||
from src.services.utils import retry_wrapper, validate_responses
|
||||
|
||||
PROMPT = open("src/agents/runner/prompt.jinja2", "r").read().strip()
|
||||
RERUNNER_PROMPT = open("src/agents/runner/rerunner.jinja2", "r").read().strip()
|
||||
|
||||
class Runner:
|
||||
def __init__(self, base_model: str):
|
||||
self.base_model = base_model
|
||||
self.llm = LLM(model_id=base_model)
|
||||
|
||||
def render(
|
||||
self,
|
||||
conversation: str,
|
||||
code_markdown: str,
|
||||
system_os: str
|
||||
) -> str:
|
||||
env = Environment(loader=BaseLoader())
|
||||
template = env.from_string(PROMPT)
|
||||
return template.render(
|
||||
conversation=conversation,
|
||||
code_markdown=code_markdown,
|
||||
system_os=system_os,
|
||||
)
|
||||
|
||||
def render_rerunner(
|
||||
self,
|
||||
conversation: str,
|
||||
code_markdown: str,
|
||||
system_os: str,
|
||||
commands: list,
|
||||
error: str
|
||||
):
|
||||
env = Environment(loader=BaseLoader())
|
||||
template = env.from_string(RERUNNER_PROMPT)
|
||||
return template.render(
|
||||
conversation=conversation,
|
||||
code_markdown=code_markdown,
|
||||
system_os=system_os,
|
||||
commands=commands,
|
||||
error=error
|
||||
)
|
||||
|
||||
@validate_responses
|
||||
def validate_response(self, response: str):
|
||||
if "commands" not in response:
|
||||
return False
|
||||
else:
|
||||
return response["commands"]
|
||||
|
||||
@validate_responses
|
||||
def validate_rerunner_response(self, response: str):
|
||||
if "action" not in response and "response" not in response:
|
||||
return False
|
||||
else:
|
||||
return response
|
||||
|
||||
@retry_wrapper
|
||||
def run_code(
|
||||
self,
|
||||
commands: list,
|
||||
project_path: str,
|
||||
project_name: str,
|
||||
conversation: list,
|
||||
code_markdown: str,
|
||||
system_os: str
|
||||
):
|
||||
retries = 0
|
||||
|
||||
for command in commands:
|
||||
command_set = command.split(" ")
|
||||
command_failed = False
|
||||
|
||||
process = subprocess.run(
|
||||
command_set,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
cwd=project_path
|
||||
)
|
||||
command_output = process.stdout.decode('utf-8')
|
||||
command_failed = process.returncode != 0
|
||||
|
||||
new_state = AgentState().new_state()
|
||||
new_state["internal_monologue"] = "Running code..."
|
||||
new_state["terminal_session"]["title"] = "Terminal"
|
||||
new_state["terminal_session"]["command"] = command
|
||||
new_state["terminal_session"]["output"] = command_output
|
||||
AgentState().add_to_current_state(project_name, new_state)
|
||||
time.sleep(1)
|
||||
|
||||
while command_failed and retries < 2:
|
||||
new_state = AgentState().new_state()
|
||||
new_state["internal_monologue"] = "Oh seems like there is some error... :("
|
||||
new_state["terminal_session"]["title"] = "Terminal"
|
||||
new_state["terminal_session"]["command"] = command
|
||||
new_state["terminal_session"]["output"] = command_output
|
||||
AgentState().add_to_current_state(project_name, new_state)
|
||||
time.sleep(1)
|
||||
|
||||
prompt = self.render_rerunner(
|
||||
conversation=conversation,
|
||||
code_markdown=code_markdown,
|
||||
system_os=system_os,
|
||||
commands=commands,
|
||||
error=command_output
|
||||
)
|
||||
|
||||
response = self.llm.inference(prompt, project_name)
|
||||
|
||||
valid_response = self.validate_rerunner_response(response)
|
||||
|
||||
if not valid_response:
|
||||
return False
|
||||
|
||||
action = valid_response["action"]
|
||||
|
||||
if action == "command":
|
||||
command = valid_response["command"]
|
||||
response = valid_response["response"]
|
||||
|
||||
ProjectManager().add_message_from_devika(project_name, response)
|
||||
|
||||
command_set = command.split(" ")
|
||||
command_failed = False
|
||||
|
||||
process = subprocess.run(
|
||||
command_set,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
cwd=project_path
|
||||
)
|
||||
command_output = process.stdout.decode('utf-8')
|
||||
command_failed = process.returncode != 0
|
||||
|
||||
new_state = AgentState().new_state()
|
||||
new_state["internal_monologue"] = "Running code..."
|
||||
new_state["terminal_session"]["title"] = "Terminal"
|
||||
new_state["terminal_session"]["command"] = command
|
||||
new_state["terminal_session"]["output"] = command_output
|
||||
AgentState().add_to_current_state(project_name, new_state)
|
||||
time.sleep(1)
|
||||
|
||||
if command_failed:
|
||||
retries += 1
|
||||
else:
|
||||
break
|
||||
elif action == "patch":
|
||||
response = valid_response["response"]
|
||||
|
||||
ProjectManager().add_message_from_devika(project_name, response)
|
||||
|
||||
code = Patcher(base_model=self.base_model).execute(
|
||||
conversation=conversation,
|
||||
code_markdown=code_markdown,
|
||||
commands=commands,
|
||||
error=command_output,
|
||||
system_os=system_os,
|
||||
project_name=project_name
|
||||
)
|
||||
|
||||
Patcher(base_model=self.base_model).save_code_to_project(code, project_name)
|
||||
|
||||
command_set = command.split(" ")
|
||||
command_failed = False
|
||||
|
||||
process = subprocess.run(
|
||||
command_set,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
cwd=project_path
|
||||
)
|
||||
command_output = process.stdout.decode('utf-8')
|
||||
command_failed = process.returncode != 0
|
||||
|
||||
new_state = AgentState().new_state()
|
||||
new_state["internal_monologue"] = "Running code..."
|
||||
new_state["terminal_session"]["title"] = "Terminal"
|
||||
new_state["terminal_session"]["command"] = command
|
||||
new_state["terminal_session"]["output"] = command_output
|
||||
AgentState().add_to_current_state(project_name, new_state)
|
||||
time.sleep(1)
|
||||
|
||||
if command_failed:
|
||||
retries += 1
|
||||
else:
|
||||
break
|
||||
|
||||
@retry_wrapper
|
||||
def execute(
|
||||
self,
|
||||
conversation: list,
|
||||
code_markdown: str,
|
||||
os_system: str,
|
||||
project_path: str,
|
||||
project_name: str
|
||||
) -> str:
|
||||
prompt = self.render(conversation, code_markdown, os_system)
|
||||
response = self.llm.inference(prompt, project_name)
|
||||
|
||||
valid_response = self.validate_response(response)
|
||||
|
||||
self.run_code(
|
||||
valid_response,
|
||||
project_path,
|
||||
project_name,
|
||||
conversation,
|
||||
code_markdown,
|
||||
os_system
|
||||
)
|
||||
|
||||
return valid_response
|
62
src/apis/project.py
Normal file
62
src/apis/project.py
Normal file
|
@ -0,0 +1,62 @@
|
|||
from flask import blueprints, request, jsonify, send_file, make_response
|
||||
from werkzeug.utils import secure_filename
|
||||
from src.logger import Logger, route_logger
|
||||
from src.config import Config
|
||||
from src.project import ProjectManager
|
||||
from ..state import AgentState
|
||||
|
||||
import os
|
||||
|
||||
project_bp = blueprints.Blueprint("project", __name__)
|
||||
|
||||
logger = Logger()
|
||||
manager = ProjectManager()
|
||||
|
||||
|
||||
# Project APIs
|
||||
|
||||
@project_bp.route("/api/get-project-files", methods=["GET"])
|
||||
@route_logger(logger)
|
||||
def project_files():
|
||||
project_name = secure_filename(request.args.get("project_name"))
|
||||
files = manager.get_project_files(project_name)
|
||||
return jsonify({"files": files})
|
||||
|
||||
@project_bp.route("/api/create-project", methods=["POST"])
|
||||
@route_logger(logger)
|
||||
def create_project():
|
||||
data = request.json
|
||||
project_name = data.get("project_name")
|
||||
manager.create_project(secure_filename(project_name))
|
||||
return jsonify({"message": "Project created"})
|
||||
|
||||
|
||||
@project_bp.route("/api/delete-project", methods=["POST"])
|
||||
@route_logger(logger)
|
||||
def delete_project():
|
||||
data = request.json
|
||||
project_name = secure_filename(data.get("project_name"))
|
||||
manager.delete_project(project_name)
|
||||
AgentState().delete_state(project_name)
|
||||
return jsonify({"message": "Project deleted"})
|
||||
|
||||
|
||||
@project_bp.route("/api/download-project", methods=["GET"])
|
||||
@route_logger(logger)
|
||||
def download_project():
|
||||
project_name = secure_filename(request.args.get("project_name"))
|
||||
manager.project_to_zip(project_name)
|
||||
project_path = manager.get_zip_path(project_name)
|
||||
return send_file(project_path, as_attachment=False)
|
||||
|
||||
|
||||
@project_bp.route("/api/download-project-pdf", methods=["GET"])
|
||||
@route_logger(logger)
|
||||
def download_project_pdf():
|
||||
project_name = secure_filename(request.args.get("project_name"))
|
||||
pdf_dir = Config().get_pdfs_dir()
|
||||
pdf_path = os.path.join(pdf_dir, f"{project_name}.pdf")
|
||||
|
||||
response = make_response(send_file(pdf_path))
|
||||
response.headers['Content-Type'] = 'project_bplication/pdf'
|
||||
return response
|
17
src/bert/sentence.py
Normal file
17
src/bert/sentence.py
Normal file
|
@ -0,0 +1,17 @@
|
|||
from keybert import KeyBERT
|
||||
|
||||
class SentenceBert:
|
||||
def __init__(self, sentence: str):
|
||||
self.sentence = sentence
|
||||
self.kw_model = KeyBERT()
|
||||
|
||||
def extract_keywords(self, top_n: int = 5) -> list:
|
||||
keywords = self.kw_model.extract_keywords(
|
||||
self.sentence,
|
||||
keyphrase_ngram_range=(1, 1),
|
||||
stop_words='english',
|
||||
top_n=top_n,
|
||||
use_mmr=True,
|
||||
diversity=0.7
|
||||
)
|
||||
return keywords
|
2
src/browser/__init__.py
Normal file
2
src/browser/__init__.py
Normal file
|
@ -0,0 +1,2 @@
|
|||
from .browser import Browser
|
||||
from .interaction import start_interaction
|
89
src/browser/browser.py
Normal file
89
src/browser/browser.py
Normal file
|
@ -0,0 +1,89 @@
|
|||
import asyncio
|
||||
import base64
|
||||
import os
|
||||
|
||||
from playwright.sync_api import sync_playwright, TimeoutError, Page
|
||||
from playwright.async_api import async_playwright, TimeoutError
|
||||
from markdownify import markdownify as md
|
||||
from pdfminer.high_level import extract_text
|
||||
from src.socket_instance import emit_agent
|
||||
from src.config import Config
|
||||
from src.state import AgentState
|
||||
|
||||
|
||||
class Browser:
|
||||
def __init__(self):
|
||||
self.playwright = None
|
||||
self.browser = None
|
||||
self.page = None
|
||||
self.agent = AgentState()
|
||||
|
||||
async def start(self):
|
||||
self.playwright = await async_playwright().start()
|
||||
self.browser = await self.playwright.chromium.launch(headless=True)
|
||||
self.page = await self.browser.new_page()
|
||||
return self
|
||||
|
||||
# def new_page(self):
|
||||
# return self.browser.new_page()
|
||||
|
||||
async def go_to(self, url):
|
||||
try:
|
||||
await self.page.goto(url, timeout=20000)
|
||||
|
||||
except TimeoutError as e:
|
||||
print(f"TimeoutError: {e} when trying to navigate to {url}")
|
||||
return False
|
||||
return True
|
||||
|
||||
async def screenshot(self, project_name):
|
||||
screenshots_save_path = Config().get_screenshots_dir()
|
||||
|
||||
page_metadata = await self.page.evaluate("() => { return { url: document.location.href, title: document.title } }")
|
||||
page_url = page_metadata['url']
|
||||
random_filename = os.urandom(20).hex()
|
||||
filename_to_save = f"{random_filename}.png"
|
||||
path_to_save = os.path.join(screenshots_save_path, filename_to_save)
|
||||
|
||||
await self.page.emulate_media(media="screen")
|
||||
await self.page.screenshot(path=path_to_save, full_page=True)
|
||||
screenshot = await self.page.screenshot()
|
||||
screenshot_bytes = base64.b64encode(screenshot).decode()
|
||||
new_state = self.agent.new_state()
|
||||
new_state["internal_monologue"] = "Browsing the web right now..."
|
||||
new_state["browser_session"]["url"] = page_url
|
||||
new_state["browser_session"]["screenshot"] = path_to_save
|
||||
self.agent.add_to_current_state(project_name, new_state)
|
||||
# self.close()
|
||||
return path_to_save, screenshot_bytes
|
||||
|
||||
def get_html(self):
|
||||
return self.page.content()
|
||||
|
||||
def get_markdown(self):
|
||||
return md(self.page.content())
|
||||
|
||||
def get_pdf(self):
|
||||
pdfs_save_path = Config().get_pdfs_dir()
|
||||
|
||||
page_metadata = self.page.evaluate("() => { return { url: document.location.href, title: document.title } }")
|
||||
filename_to_save = f"{page_metadata['title']}.pdf"
|
||||
save_path = os.path.join(pdfs_save_path, filename_to_save)
|
||||
|
||||
self.page.pdf(path=save_path)
|
||||
|
||||
return save_path
|
||||
|
||||
def pdf_to_text(self, pdf_path):
|
||||
return extract_text(pdf_path).strip()
|
||||
|
||||
def get_content(self):
|
||||
pdf_path = self.get_pdf()
|
||||
return self.pdf_to_text(pdf_path)
|
||||
|
||||
def extract_text(self):
|
||||
return self.page.evaluate("() => document.body.innerText")
|
||||
|
||||
async def close(self):
|
||||
await self.page.close()
|
||||
await self.browser.close()
|
547
src/browser/interaction.py
Normal file
547
src/browser/interaction.py
Normal file
|
@ -0,0 +1,547 @@
|
|||
#!/usr/bin/env python3
|
||||
#
|
||||
# natbot.py
|
||||
# https://github.com/nat/natbot
|
||||
#
|
||||
# MODIFIED FOR DEVIKA
|
||||
|
||||
from playwright.sync_api import sync_playwright
|
||||
import os
|
||||
import time
|
||||
from sys import exit, platform
|
||||
|
||||
from src.config import Config
|
||||
from src.state import AgentState
|
||||
from src.llm import LLM
|
||||
|
||||
prompt_template = """
|
||||
You are an agent controlling a browser. You are given:
|
||||
|
||||
(1) an objective that you are trying to achieve
|
||||
(2) the URL of your current web page
|
||||
(3) a simplified text description of what's visible in the browser window (more on that below)
|
||||
|
||||
You can issue these commands:
|
||||
SCROLL UP - scroll up one page
|
||||
SCROLL DOWN - scroll down one page
|
||||
CLICK X - click on a given element. You can only click on links, buttons, and inputs!
|
||||
TYPE X "TEXT" - type the specified text into the input with id X
|
||||
TYPESUBMIT X "TEXT" - same as TYPE above, except then it presses ENTER to submit the form
|
||||
|
||||
The format of the browser content is highly simplified; all formatting elements are stripped.
|
||||
Interactive elements such as links, inputs, buttons are represented like this:
|
||||
|
||||
<link id=1>text</link>
|
||||
<button id=2>text</button>
|
||||
<input id=3>text</input>
|
||||
|
||||
Images are rendered as their alt text like this:
|
||||
|
||||
<img id=4 alt=""/>
|
||||
|
||||
Based on your given objective, issue whatever command you believe will get you closest to achieving your goal.
|
||||
You always start on Google; you should submit a search query to Google that will take you to the best page for
|
||||
achieving your objective. And then interact with that page to achieve your objective.
|
||||
|
||||
If you find yourself on Google and there are no search results displayed yet, you should probably issue a command
|
||||
like "TYPESUBMIT 7 "search query"" to get to a more useful page.
|
||||
|
||||
Then, if you find yourself on a Google search results page, you might issue the command "CLICK 24" to click
|
||||
on the first link in the search results. (If your previous command was a TYPESUBMIT your next command should
|
||||
probably be a CLICK.)
|
||||
|
||||
Don't try to interact with elements that you can't see.
|
||||
|
||||
Here are some examples:
|
||||
|
||||
EXAMPLE 1:
|
||||
==================================================
|
||||
CURRENT BROWSER CONTENT:
|
||||
------------------
|
||||
<link id=1>About</link>
|
||||
<link id=2>Store</link>
|
||||
<link id=3>Gmail</link>
|
||||
<link id=4>Images</link>
|
||||
<link id=5>(Google apps)</link>
|
||||
<link id=6>Sign in</link>
|
||||
<img id=7 alt="(Google)"/>
|
||||
<input id=8 alt="Search"></input>
|
||||
<button id=9>(Search by voice)</button>
|
||||
<button id=10>(Google Search)</button>
|
||||
<button id=11>(I'm Feeling Lucky)</button>
|
||||
<link id=12>Advertising</link>
|
||||
<link id=13>Business</link>
|
||||
<link id=14>How Search works</link>
|
||||
<link id=15>Carbon neutral since 2007</link>
|
||||
<link id=16>Privacy</link>
|
||||
<link id=17>Terms</link>
|
||||
<text id=18>Settings</text>
|
||||
------------------
|
||||
OBJECTIVE: Find a 2 bedroom house for sale in Anchorage AK for under $750k
|
||||
CURRENT URL: https://www.google.com/
|
||||
YOUR COMMAND:
|
||||
TYPESUBMIT 8 "anchorage redfin"
|
||||
==================================================
|
||||
|
||||
EXAMPLE 2:
|
||||
==================================================
|
||||
CURRENT BROWSER CONTENT:
|
||||
------------------
|
||||
<link id=1>About</link>
|
||||
<link id=2>Store</link>
|
||||
<link id=3>Gmail</link>
|
||||
<link id=4>Images</link>
|
||||
<link id=5>(Google apps)</link>
|
||||
<link id=6>Sign in</link>
|
||||
<img id=7 alt="(Google)"/>
|
||||
<input id=8 alt="Search"></input>
|
||||
<button id=9>(Search by voice)</button>
|
||||
<button id=10>(Google Search)</button>
|
||||
<button id=11>(I'm Feeling Lucky)</button>
|
||||
<link id=12>Advertising</link>
|
||||
<link id=13>Business</link>
|
||||
<link id=14>How Search works</link>
|
||||
<link id=15>Carbon neutral since 2007</link>
|
||||
<link id=16>Privacy</link>
|
||||
<link id=17>Terms</link>
|
||||
<text id=18>Settings</text>
|
||||
------------------
|
||||
OBJECTIVE: Make a reservation for 4 at Dorsia at 8pm
|
||||
CURRENT URL: https://www.google.com/
|
||||
YOUR COMMAND:
|
||||
TYPESUBMIT 8 "dorsia nyc opentable"
|
||||
==================================================
|
||||
|
||||
EXAMPLE 3:
|
||||
==================================================
|
||||
CURRENT BROWSER CONTENT:
|
||||
------------------
|
||||
<button id=1>For Businesses</button>
|
||||
<button id=2>Mobile</button>
|
||||
<button id=3>Help</button>
|
||||
<button id=4 alt="Language Picker">EN</button>
|
||||
<link id=5>OpenTable logo</link>
|
||||
<button id=6 alt ="search">Search</button>
|
||||
<text id=7>Find your table for any occasion</text>
|
||||
<button id=8>(Date selector)</button>
|
||||
<text id=9>Sep 28, 2022</text>
|
||||
<text id=10>7:00 PM</text>
|
||||
<text id=11>2 people</text>
|
||||
<input id=12 alt="Location, Restaurant, or Cuisine"></input>
|
||||
<button id=13>Let's go</button>
|
||||
<text id=14>It looks like you're in Peninsula. Not correct?</text>
|
||||
<button id=15>Get current location</button>
|
||||
<button id=16>Next</button>
|
||||
------------------
|
||||
OBJECTIVE: Make a reservation for 4 for dinner at Dorsia in New York City at 8pm
|
||||
CURRENT URL: https://www.opentable.com/
|
||||
YOUR COMMAND:
|
||||
TYPESUBMIT 12 "dorsia new york city"
|
||||
==================================================
|
||||
|
||||
The current browser content, objective, and current URL follow. Reply with your next command to the browser.
|
||||
|
||||
CURRENT BROWSER CONTENT:
|
||||
------------------
|
||||
$browser_content
|
||||
------------------
|
||||
|
||||
OBJECTIVE: $objective
|
||||
CURRENT URL: $url
|
||||
PREVIOUS COMMAND: $previous_command
|
||||
YOUR COMMAND:
|
||||
"""
|
||||
|
||||
black_listed_elements = set(["html", "head", "title", "meta", "iframe", "body", "script", "style", "path", "svg", "br", "::marker",])
|
||||
|
||||
class Crawler:
|
||||
def __init__(self):
|
||||
self.browser = (
|
||||
sync_playwright()
|
||||
.start()
|
||||
.chromium.launch(
|
||||
headless=True,
|
||||
)
|
||||
)
|
||||
|
||||
self.page = self.browser.new_page()
|
||||
self.page.set_viewport_size({"width": 1280, "height": 1080})
|
||||
|
||||
def screenshot(self, project_name):
|
||||
screenshots_save_path = Config().get_screenshots_dir()
|
||||
|
||||
page_metadata = self.page.evaluate("() => { return { url: document.location.href, title: document.title } }")
|
||||
page_url = page_metadata['url']
|
||||
random_filename = os.urandom(20).hex()
|
||||
filename_to_save = f"{random_filename}.png"
|
||||
path_to_save = os.path.join(screenshots_save_path, filename_to_save)
|
||||
|
||||
self.page.emulate_media(media="screen")
|
||||
self.page.screenshot(path=path_to_save)
|
||||
|
||||
new_state = AgentState().new_state()
|
||||
new_state["internal_monologue"] = "Browsing the web right now..."
|
||||
new_state["browser_session"]["url"] = page_url
|
||||
new_state["browser_session"]["screenshot"] = path_to_save
|
||||
AgentState().add_to_current_state(project_name, new_state)
|
||||
|
||||
return path_to_save
|
||||
|
||||
def go_to_page(self, url):
|
||||
self.page.goto(url=url if "://" in url else "http://" + url)
|
||||
self.client = self.page.context.new_cdp_session(self.page)
|
||||
self.page_element_buffer = {}
|
||||
|
||||
def scroll(self, direction):
|
||||
if direction == "up":
|
||||
self.page.evaluate(
|
||||
"(document.scrollingElement || document.body).scrollTop = (document.scrollingElement || document.body).scrollTop - window.innerHeight;"
|
||||
)
|
||||
elif direction == "down":
|
||||
self.page.evaluate(
|
||||
"(document.scrollingElement || document.body).scrollTop = (document.scrollingElement || document.body).scrollTop + window.innerHeight;"
|
||||
)
|
||||
|
||||
def click(self, id):
|
||||
# Inject javascript into the page which removes the target= attribute from all links
|
||||
js = """
|
||||
links = document.getElementsByTagName("a");
|
||||
for (var i = 0; i < links.length; i++) {
|
||||
links[i].removeAttribute("target");
|
||||
}
|
||||
"""
|
||||
self.page.evaluate(js)
|
||||
|
||||
element = self.page_element_buffer.get(int(id))
|
||||
if element:
|
||||
x = element.get("center_x")
|
||||
y = element.get("center_y")
|
||||
|
||||
self.page.mouse.click(x, y)
|
||||
else:
|
||||
print("Could not find element")
|
||||
|
||||
def type(self, id, text):
|
||||
self.click(id)
|
||||
self.page.keyboard.type(text)
|
||||
|
||||
def enter(self):
|
||||
self.page.keyboard.press("Enter")
|
||||
|
||||
def crawl(self):
|
||||
page = self.page
|
||||
page_element_buffer = self.page_element_buffer
|
||||
start = time.time()
|
||||
|
||||
page_state_as_text = []
|
||||
|
||||
device_pixel_ratio = page.evaluate("window.devicePixelRatio")
|
||||
if platform == "darwin" and device_pixel_ratio == 1: # lies
|
||||
device_pixel_ratio = 2
|
||||
|
||||
win_scroll_x = page.evaluate("window.scrollX")
|
||||
win_scroll_y = page.evaluate("window.scrollY")
|
||||
win_upper_bound = page.evaluate("window.pageYOffset")
|
||||
win_left_bound = page.evaluate("window.pageXOffset")
|
||||
win_width = page.evaluate("window.screen.width")
|
||||
win_height = page.evaluate("window.screen.height")
|
||||
win_right_bound = win_left_bound + win_width
|
||||
win_lower_bound = win_upper_bound + win_height
|
||||
document_offset_height = page.evaluate("document.body.offsetHeight")
|
||||
document_scroll_height = page.evaluate("document.body.scrollHeight")
|
||||
|
||||
# Removed unused percentage_progress variables
|
||||
|
||||
tree = self.client.send(
|
||||
"DOMSnapshot.captureSnapshot",
|
||||
{"computedStyles": [], "includeDOMRects": True, "includePaintOrder": True},
|
||||
)
|
||||
strings = tree["strings"]
|
||||
document = tree["documents"][0]
|
||||
nodes = document["nodes"]
|
||||
backend_node_id = nodes["backendNodeId"]
|
||||
attributes = nodes["attributes"]
|
||||
node_value = nodes["nodeValue"]
|
||||
parent = nodes["parentIndex"]
|
||||
node_types = nodes["nodeType"]
|
||||
node_names = nodes["nodeName"]
|
||||
is_clickable = set(nodes["isClickable"]["index"])
|
||||
|
||||
text_value = nodes["textValue"]
|
||||
text_value_index = text_value["index"]
|
||||
text_value_values = text_value["value"]
|
||||
|
||||
input_value = nodes["inputValue"]
|
||||
input_value_index = input_value["index"]
|
||||
input_value_values = input_value["value"]
|
||||
|
||||
input_checked = nodes["inputChecked"]
|
||||
layout = document["layout"]
|
||||
layout_node_index = layout["nodeIndex"]
|
||||
bounds = layout["bounds"]
|
||||
|
||||
cursor = 0
|
||||
html_elements_text = []
|
||||
|
||||
child_nodes = {}
|
||||
elements_in_view_port = []
|
||||
|
||||
# Refactored to use dict.setdefault() for cleaner logic
|
||||
ancestor_exceptions = {
|
||||
"a": {"ancestry": {"-1": (False, None)}, "nodes": {}},
|
||||
"button": {"ancestry": {"-1": (False, None)}, "nodes": {}},
|
||||
}
|
||||
|
||||
def convert_name(node_name, is_clickable):
|
||||
if node_name == "a":
|
||||
return "link"
|
||||
if node_name == "input":
|
||||
return "input"
|
||||
if node_name == "img":
|
||||
return "img"
|
||||
if node_name == "button" or is_clickable:
|
||||
return "button"
|
||||
return "text"
|
||||
|
||||
def find_attributes(attributes, keys):
|
||||
values = {}
|
||||
for [key_index, value_index] in zip(*(iter(attributes),) * 2):
|
||||
if value_index < 0:
|
||||
continue
|
||||
key = strings[key_index]
|
||||
value = strings[value_index]
|
||||
if key in keys:
|
||||
values[key] = value
|
||||
keys.remove(key)
|
||||
if not keys:
|
||||
return values
|
||||
return values
|
||||
|
||||
def add_to_hash_tree(hash_tree, tag, node_id, node_name, parent_id):
|
||||
parent_id_str = str(parent_id)
|
||||
if parent_id_str not in hash_tree:
|
||||
parent_name = strings[node_names[parent_id]].lower()
|
||||
grand_parent_id = parent[parent_id]
|
||||
add_to_hash_tree(hash_tree, tag, parent_id, parent_name, grand_parent_id)
|
||||
is_parent_desc_anchor, anchor_id = hash_tree[parent_id_str]
|
||||
value = (True, node_id) if node_name == tag else (True, anchor_id) if is_parent_desc_anchor else (False, None)
|
||||
hash_tree[str(node_id)] = value
|
||||
return value
|
||||
|
||||
for index, node_name_index in enumerate(node_names):
|
||||
node_parent = parent[index]
|
||||
node_name = strings[node_name_index].lower()
|
||||
|
||||
# Refactored to use dict to store exceptions
|
||||
for tag in ancestor_exceptions:
|
||||
is_ancestor_of_tag, tag_id = add_to_hash_tree(ancestor_exceptions[tag]["ancestry"], tag, index, node_name, node_parent)
|
||||
ancestor_exceptions[tag]["nodes"][str(index)] = (is_ancestor_of_tag, tag_id)
|
||||
|
||||
try:
|
||||
cursor = layout_node_index.index(index)
|
||||
except:
|
||||
continue
|
||||
|
||||
if node_name in black_listed_elements:
|
||||
continue
|
||||
|
||||
[x, y, width, height] = bounds[cursor]
|
||||
x /= device_pixel_ratio
|
||||
y /= device_pixel_ratio
|
||||
width /= device_pixel_ratio
|
||||
height /= device_pixel_ratio
|
||||
|
||||
elem_left_bound = x
|
||||
elem_top_bound = y
|
||||
elem_right_bound = x + width
|
||||
elem_lower_bound = y + height
|
||||
|
||||
partially_is_in_viewport = (
|
||||
elem_left_bound < win_right_bound
|
||||
and elem_right_bound >= win_left_bound
|
||||
and elem_top_bound < win_lower_bound
|
||||
and elem_lower_bound >= win_upper_bound
|
||||
)
|
||||
|
||||
if not partially_is_in_viewport:
|
||||
continue
|
||||
|
||||
meta_data = []
|
||||
|
||||
# Refactored to use dict to store and access attributes
|
||||
element_attributes = find_attributes(
|
||||
attributes[index], ["type", "placeholder", "aria-label", "title", "alt"]
|
||||
)
|
||||
|
||||
ancestor_exception = {
|
||||
tag: ancestor_exceptions[tag]["nodes"].get(str(index), (False, None))
|
||||
for tag in ancestor_exceptions
|
||||
}
|
||||
|
||||
is_ancestor_of_anchor, anchor_id = ancestor_exception.get("a", (False, None))
|
||||
is_ancestor_of_button, button_id = ancestor_exception.get("button", (False, None))
|
||||
ancestor_node_key = (
|
||||
str(anchor_id) if is_ancestor_of_anchor else str(button_id) if is_ancestor_of_button else None
|
||||
)
|
||||
ancestor_node = (
|
||||
child_nodes.setdefault(str(ancestor_node_key), [])
|
||||
if is_ancestor_of_anchor or is_ancestor_of_button
|
||||
else None
|
||||
)
|
||||
|
||||
if node_name == "#text" and ancestor_node is not None:
|
||||
text = strings[node_value[index]]
|
||||
if text in ["•", "|"]:
|
||||
continue
|
||||
ancestor_node.append({"type": "text", "value": text})
|
||||
else:
|
||||
if (node_name == "input" and element_attributes.get("type") == "submit") or node_name == "button":
|
||||
node_name = "button"
|
||||
element_attributes.pop("type", None)
|
||||
|
||||
for key, value in element_attributes.items():
|
||||
if ancestor_node is not None:
|
||||
ancestor_node.append({"type": "attribute", "key": key, "value": value})
|
||||
else:
|
||||
meta_data.append(value)
|
||||
|
||||
element_node_value = None
|
||||
if node_value[index] >= 0:
|
||||
element_node_value = strings[node_value[index]]
|
||||
if element_node_value == "|":
|
||||
continue
|
||||
elif node_name == "input" and index in input_value_index:
|
||||
input_text_index = input_value_index.index(index)
|
||||
text_index = input_value_values[input_text_index]
|
||||
if text_index >= 0:
|
||||
element_node_value = strings[text_index]
|
||||
|
||||
if (is_ancestor_of_anchor or is_ancestor_of_button) and (node_name != "a" and node_name != "button"):
|
||||
continue
|
||||
|
||||
elements_in_view_port.append({
|
||||
"node_index": str(index),
|
||||
"backend_node_id": backend_node_id[index],
|
||||
"node_name": node_name,
|
||||
"node_value": element_node_value,
|
||||
"node_meta": meta_data,
|
||||
"is_clickable": index in is_clickable,
|
||||
"origin_x": int(x),
|
||||
"origin_y": int(y),
|
||||
"center_x": int(x + (width / 2)),
|
||||
"center_y": int(y + (height / 2)),
|
||||
})
|
||||
|
||||
elements_of_interest = []
|
||||
id_counter = 0
|
||||
|
||||
for element in elements_in_view_port:
|
||||
node_index = element["node_index"]
|
||||
node_name = element["node_name"]
|
||||
node_value = element["node_value"]
|
||||
is_clickable = element["is_clickable"]
|
||||
meta_data = element["node_meta"]
|
||||
|
||||
inner_text = f"{node_value} " if node_value else ""
|
||||
meta = ""
|
||||
|
||||
if node_index in child_nodes:
|
||||
for child in child_nodes[node_index]:
|
||||
entry_type = child["type"]
|
||||
entry_value = child["value"]
|
||||
if entry_type == "attribute":
|
||||
entry_key = child["key"]
|
||||
meta_data.append(f'{entry_key}="{entry_value}"')
|
||||
else:
|
||||
inner_text += f"{entry_value} "
|
||||
|
||||
if meta_data:
|
||||
meta = f' {" ".join(meta_data)}'
|
||||
inner_text = inner_text.strip()
|
||||
|
||||
# Refactored to use descriptive variable names
|
||||
should_include_element = (
|
||||
inner_text != "" or
|
||||
node_name in ["link", "input", "img", "button", "textarea"] or
|
||||
(node_name == "button" and meta != "")
|
||||
)
|
||||
if not should_include_element:
|
||||
continue
|
||||
|
||||
page_element_buffer[id_counter] = element
|
||||
|
||||
element_string = f'<{convert_name(node_name, is_clickable)} id={id_counter}{meta}>'
|
||||
if inner_text:
|
||||
element_string += f'{inner_text}</{convert_name(node_name, is_clickable)}>'
|
||||
else:
|
||||
element_string += '/>'
|
||||
elements_of_interest.append(element_string)
|
||||
|
||||
id_counter += 1
|
||||
|
||||
print(f'Parsing time: {time.time() - start:.2f} seconds')
|
||||
return elements_of_interest
|
||||
|
||||
def start_interaction(model_id, objective, project_name):
|
||||
_crawler = Crawler()
|
||||
|
||||
def print_help():
|
||||
print(
|
||||
"(g) to visit url\n(u) scroll up\n(d) scroll down\n(c) to click\n(t) to type\n" +
|
||||
"(h) to view commands again\n(r/enter) to run suggested command\n(o) change objective"
|
||||
)
|
||||
|
||||
def get_gpt_command(objective, url, previous_command, browser_content):
|
||||
prompt = prompt_template
|
||||
prompt = prompt.replace("$objective", objective)
|
||||
prompt = prompt.replace("$url", url[:100])
|
||||
prompt = prompt.replace("$previous_command", previous_command)
|
||||
prompt = prompt.replace("$browser_content", browser_content[:4500])
|
||||
response = LLM(model_id=model_id).inference(prompt)
|
||||
return response
|
||||
|
||||
def run_cmd(cmd):
|
||||
cmd = cmd.split("\n")[0]
|
||||
|
||||
if cmd.startswith("SCROLL UP"):
|
||||
_crawler.scroll("up")
|
||||
elif cmd.startswith("SCROLL DOWN"):
|
||||
_crawler.scroll("down")
|
||||
elif cmd.startswith("CLICK"):
|
||||
commasplit = cmd.split(",")
|
||||
id = commasplit[0].split(" ")[1]
|
||||
_crawler.click(id)
|
||||
elif cmd.startswith("TYPE"):
|
||||
spacesplit = cmd.split(" ")
|
||||
id = spacesplit[1]
|
||||
text = " ".join(spacesplit[2:])
|
||||
text = text[1:-1]
|
||||
if cmd.startswith("TYPESUBMIT"):
|
||||
text += '\n'
|
||||
_crawler.type(id, text)
|
||||
|
||||
time.sleep(2)
|
||||
|
||||
gpt_cmd = ""
|
||||
prev_cmd = ""
|
||||
_crawler.go_to_page("google.com")
|
||||
|
||||
try:
|
||||
visits = 0
|
||||
|
||||
while True and visits < 5:
|
||||
browser_content = "\n".join(_crawler.crawl())
|
||||
prev_cmd = gpt_cmd
|
||||
|
||||
current_url = _crawler.page.url
|
||||
|
||||
_crawler.screenshot(project_name)
|
||||
|
||||
gpt_cmd = get_gpt_command(objective, current_url, prev_cmd, browser_content).strip()
|
||||
run_cmd(gpt_cmd)
|
||||
|
||||
visits += 1
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("\n[!] Ctrl+C detected, exiting gracefully.")
|
||||
exit(0)
|
167
src/browser/search.py
Normal file
167
src/browser/search.py
Normal file
|
@ -0,0 +1,167 @@
|
|||
import requests
|
||||
from src.config import Config
|
||||
|
||||
import re
|
||||
from urllib.parse import unquote
|
||||
from html import unescape
|
||||
import orjson
|
||||
|
||||
|
||||
class BingSearch:
|
||||
def __init__(self):
|
||||
self.config = Config()
|
||||
self.bing_api_key = self.config.get_bing_api_key()
|
||||
self.bing_api_endpoint = self.config.get_bing_api_endpoint()
|
||||
self.query_result = None
|
||||
|
||||
def search(self, query):
|
||||
headers = {"Ocp-Apim-Subscription-Key": self.bing_api_key}
|
||||
params = {"q": query, "mkt": "en-US"}
|
||||
|
||||
try:
|
||||
response = requests.get(self.bing_api_endpoint, headers=headers, params=params)
|
||||
response.raise_for_status()
|
||||
self.query_result = response.json()
|
||||
return self.query_result
|
||||
except Exception as error:
|
||||
return error
|
||||
|
||||
def get_first_link(self):
|
||||
return self.query_result["webPages"]["value"][0]["url"]
|
||||
|
||||
|
||||
class GoogleSearch:
|
||||
def __init__(self):
|
||||
self.config = Config()
|
||||
self.google_search_api_key = self.config.get_google_search_api_key()
|
||||
self.google_search_engine_ID = self.config.get_google_search_engine_id()
|
||||
self.google_search_api_endpoint = self.config.get_google_search_api_endpoint()
|
||||
self.query_result = None
|
||||
|
||||
def search(self, query):
|
||||
params = {
|
||||
"key": self.google_search_api_key,
|
||||
"cx": self.google_search_engine_ID,
|
||||
"q": query
|
||||
}
|
||||
try:
|
||||
print("Searching in Google...")
|
||||
response = requests.get(self.google_search_api_endpoint, params=params)
|
||||
# response.raise_for_status()
|
||||
self.query_result = response.json()
|
||||
except Exception as error:
|
||||
return error
|
||||
|
||||
def get_first_link(self):
|
||||
item = ""
|
||||
try:
|
||||
if 'items' in self.query_result:
|
||||
item = self.query_result['items'][0]['link']
|
||||
return item
|
||||
except Exception as error:
|
||||
print(error)
|
||||
return ""
|
||||
|
||||
# class DuckDuckGoSearch:
|
||||
# def __init__(self):
|
||||
# self.query_result = None
|
||||
#
|
||||
# def search(self, query):
|
||||
# from duckduckgo_search import DDGS
|
||||
# try:
|
||||
# self.query_result = DDGS().text(query, max_results=5, region="us")
|
||||
# print(self.query_result)
|
||||
#
|
||||
# except Exception as err:
|
||||
# print(err)
|
||||
#
|
||||
# def get_first_link(self):
|
||||
# if self.query_result:
|
||||
# return self.query_result[0]["href"]
|
||||
# else:
|
||||
# return None
|
||||
#
|
||||
|
||||
|
||||
class DuckDuckGoSearch:
|
||||
"""DuckDuckGo search engine class.
|
||||
methods are inherited from the duckduckgo_search package.
|
||||
do not change the methods.
|
||||
|
||||
currently, the package is not working with our current setup.
|
||||
"""
|
||||
def __init__(self):
|
||||
from curl_cffi import requests as curl_requests
|
||||
self.query_result = None
|
||||
self.asession = curl_requests.Session(impersonate="chrome", allow_redirects=False)
|
||||
self.asession.headers["Referer"] = "https://duckduckgo.com/"
|
||||
|
||||
def _get_url(self, method, url, data):
|
||||
try:
|
||||
resp = self.asession.request(method, url, data=data)
|
||||
if resp.status_code == 200:
|
||||
return resp.content
|
||||
if resp.status_code == (202, 301, 403):
|
||||
raise Exception(f"Error: {resp.status_code} rate limit error")
|
||||
if not resp:
|
||||
return None
|
||||
except Exception as error:
|
||||
if "timeout" in str(error).lower():
|
||||
raise TimeoutError("Duckduckgo timed out error")
|
||||
|
||||
def duck(self, query):
|
||||
resp = self._get_url("POST", "https://duckduckgo.com/", data={"q": query})
|
||||
vqd = self.extract_vqd(resp)
|
||||
|
||||
params = {"q": query, "kl": 'en-us', "p": "1", "s": "0", "df": "", "vqd": vqd, "ex": ""}
|
||||
resp = self._get_url("GET", "https://links.duckduckgo.com/d.js", params)
|
||||
page_data = self.text_extract_json(resp)
|
||||
|
||||
results = []
|
||||
for row in page_data:
|
||||
href = row.get("u")
|
||||
if href and href != f"http://www.google.com/search?q={query}":
|
||||
body = self.normalize(row["a"])
|
||||
if body:
|
||||
result = {
|
||||
"title": self.normalize(row["t"]),
|
||||
"href": self.normalize_url(href),
|
||||
"body": self.normalize(row["a"]),
|
||||
}
|
||||
results.append(result)
|
||||
|
||||
self.query_result = results
|
||||
|
||||
def search(self, query):
|
||||
self.duck(query)
|
||||
|
||||
def get_first_link(self):
|
||||
return self.query_result[0]["href"]
|
||||
|
||||
@staticmethod
|
||||
def extract_vqd(html_bytes: bytes) -> str:
|
||||
patterns = [(b'vqd="', 5, b'"'), (b"vqd=", 4, b"&"), (b"vqd='", 5, b"'")]
|
||||
for start_pattern, offset, end_pattern in patterns:
|
||||
try:
|
||||
start = html_bytes.index(start_pattern) + offset
|
||||
end = html_bytes.index(end_pattern, start)
|
||||
return html_bytes[start:end].decode()
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
@staticmethod
|
||||
def text_extract_json(html_bytes):
|
||||
try:
|
||||
start = html_bytes.index(b"DDG.pageLayout.load('d',") + 24
|
||||
end = html_bytes.index(b");DDG.duckbar.load(", start)
|
||||
return orjson.loads(html_bytes[start:end])
|
||||
except Exception as ex:
|
||||
print(f"Error extracting JSON: {type(ex).__name__}: {ex}")
|
||||
|
||||
@staticmethod
|
||||
def normalize_url(url: str) -> str:
|
||||
return unquote(url.replace(" ", "+")) if url else ""
|
||||
|
||||
@staticmethod
|
||||
def normalize(raw_html: str) -> str:
|
||||
return unescape(re.sub("<.*?>", "", raw_html)) if raw_html else ""
|
188
src/config.py
Normal file
188
src/config.py
Normal file
|
@ -0,0 +1,188 @@
|
|||
import toml
|
||||
import os
|
||||
|
||||
|
||||
class Config:
|
||||
_instance = None
|
||||
|
||||
def __new__(cls):
|
||||
if cls._instance is None:
|
||||
cls._instance = super().__new__(cls)
|
||||
cls._instance._load_config()
|
||||
return cls._instance
|
||||
|
||||
def _load_config(self):
|
||||
# If the config file doesn't exist, copy from the sample
|
||||
if not os.path.exists("config.toml"):
|
||||
with open("sample.config.toml", "r") as f_in, open("config.toml", "w+") as f_out:
|
||||
f_out.write(f_in.read())
|
||||
f_out.seek(0)
|
||||
self.config = toml.load(f_out)
|
||||
else:
|
||||
# check if all the keys are present in the config file
|
||||
with open("sample.config.toml", "r") as f:
|
||||
sample_config = toml.load(f)
|
||||
|
||||
with open("config.toml", "r+") as f:
|
||||
config = toml.load(f)
|
||||
|
||||
# Update the config with any missing keys and their keys of keys
|
||||
for key, value in sample_config.items():
|
||||
config.setdefault(key, value)
|
||||
if isinstance(value, dict):
|
||||
for sub_key, sub_value in value.items():
|
||||
config[key].setdefault(sub_key, sub_value)
|
||||
|
||||
f.seek(0)
|
||||
toml.dump(config, f)
|
||||
f.truncate()
|
||||
|
||||
self.config = config
|
||||
|
||||
def get_config(self):
|
||||
return self.config
|
||||
|
||||
def get_bing_api_endpoint(self):
|
||||
return self.config["API_ENDPOINTS"]["BING"]
|
||||
|
||||
def get_bing_api_key(self):
|
||||
return self.config["API_KEYS"]["BING"]
|
||||
|
||||
def get_google_search_api_key(self):
|
||||
return self.config["API_KEYS"]["GOOGLE_SEARCH"]
|
||||
|
||||
def get_google_search_engine_id(self):
|
||||
return self.config["API_KEYS"]["GOOGLE_SEARCH_ENGINE_ID"]
|
||||
|
||||
def get_google_search_api_endpoint(self):
|
||||
return self.config["API_ENDPOINTS"]["GOOGLE"]
|
||||
|
||||
def get_ollama_api_endpoint(self):
|
||||
return self.config["API_ENDPOINTS"]["OLLAMA"]
|
||||
|
||||
def get_claude_api_key(self):
|
||||
return self.config["API_KEYS"]["CLAUDE"]
|
||||
|
||||
def get_openai_api_key(self):
|
||||
return self.config["API_KEYS"]["OPENAI"]
|
||||
|
||||
def get_openai_api_base_url(self):
|
||||
return self.config["API_ENDPOINTS"]["OPENAI"]
|
||||
|
||||
def get_gemini_api_key(self):
|
||||
return self.config["API_KEYS"]["GEMINI"]
|
||||
|
||||
def get_mistral_api_key(self):
|
||||
return self.config["API_KEYS"]["MISTRAL"]
|
||||
|
||||
def get_groq_api_key(self):
|
||||
return self.config["API_KEYS"]["GROQ"]
|
||||
|
||||
def get_netlify_api_key(self):
|
||||
return self.config["API_KEYS"]["NETLIFY"]
|
||||
|
||||
def get_sqlite_db(self):
|
||||
return self.config["STORAGE"]["SQLITE_DB"]
|
||||
|
||||
def get_screenshots_dir(self):
|
||||
return self.config["STORAGE"]["SCREENSHOTS_DIR"]
|
||||
|
||||
def get_pdfs_dir(self):
|
||||
return self.config["STORAGE"]["PDFS_DIR"]
|
||||
|
||||
def get_projects_dir(self):
|
||||
return self.config["STORAGE"]["PROJECTS_DIR"]
|
||||
|
||||
def get_logs_dir(self):
|
||||
return self.config["STORAGE"]["LOGS_DIR"]
|
||||
|
||||
def get_repos_dir(self):
|
||||
return self.config["STORAGE"]["REPOS_DIR"]
|
||||
|
||||
def get_logging_rest_api(self):
|
||||
return self.config["LOGGING"]["LOG_REST_API"] == "true"
|
||||
|
||||
def get_logging_prompts(self):
|
||||
return self.config["LOGGING"]["LOG_PROMPTS"] == "true"
|
||||
|
||||
def get_timeout_inference(self):
|
||||
return self.config["TIMEOUT"]["INFERENCE"]
|
||||
|
||||
def set_bing_api_key(self, key):
|
||||
self.config["API_KEYS"]["BING"] = key
|
||||
self.save_config()
|
||||
|
||||
def set_bing_api_endpoint(self, endpoint):
|
||||
self.config["API_ENDPOINTS"]["BING"] = endpoint
|
||||
self.save_config()
|
||||
|
||||
def set_google_search_api_key(self, key):
|
||||
self.config["API_KEYS"]["GOOGLE_SEARCH"] = key
|
||||
self.save_config()
|
||||
|
||||
def set_google_search_engine_id(self, key):
|
||||
self.config["API_KEYS"]["GOOGLE_SEARCH_ENGINE_ID"] = key
|
||||
self.save_config()
|
||||
|
||||
def set_google_search_api_endpoint(self, endpoint):
|
||||
self.config["API_ENDPOINTS"]["GOOGLE_SEARCH"] = endpoint
|
||||
self.save_config()
|
||||
|
||||
def set_ollama_api_endpoint(self, endpoint):
|
||||
self.config["API_ENDPOINTS"]["OLLAMA"] = endpoint
|
||||
self.save_config()
|
||||
|
||||
def set_claude_api_key(self, key):
|
||||
self.config["API_KEYS"]["CLAUDE"] = key
|
||||
self.save_config()
|
||||
|
||||
def set_openai_api_key(self, key):
|
||||
self.config["API_KEYS"]["OPENAI"] = key
|
||||
self.save_config()
|
||||
|
||||
def set_openai_api_endpoint(self,endpoint):
|
||||
self.config["API_ENDPOINTS"]["OPENAI"] = endpoint
|
||||
self.save_config()
|
||||
|
||||
def set_gemini_api_key(self, key):
|
||||
self.config["API_KEYS"]["GEMINI"] = key
|
||||
self.save_config()
|
||||
|
||||
def set_mistral_api_key(self, key):
|
||||
self.config["API_KEYS"]["MISTRAL"] = key
|
||||
self.save_config()
|
||||
|
||||
def set_groq_api_key(self, key):
|
||||
self.config["API_KEYS"]["GROQ"] = key
|
||||
self.save_config()
|
||||
|
||||
def set_netlify_api_key(self, key):
|
||||
self.config["API_KEYS"]["NETLIFY"] = key
|
||||
self.save_config()
|
||||
|
||||
def set_logging_rest_api(self, value):
|
||||
self.config["LOGGING"]["LOG_REST_API"] = "true" if value else "false"
|
||||
self.save_config()
|
||||
|
||||
def set_logging_prompts(self, value):
|
||||
self.config["LOGGING"]["LOG_PROMPTS"] = "true" if value else "false"
|
||||
self.save_config()
|
||||
|
||||
def set_timeout_inference(self, value):
|
||||
self.config["TIMEOUT"]["INFERENCE"] = value
|
||||
self.save_config()
|
||||
|
||||
def save_config(self):
|
||||
with open("config.toml", "w") as f:
|
||||
toml.dump(self.config, f)
|
||||
|
||||
def update_config(self, data):
|
||||
for key, value in data.items():
|
||||
if key in self.config:
|
||||
with open("config.toml", "r+") as f:
|
||||
config = toml.load(f)
|
||||
for sub_key, sub_value in value.items():
|
||||
self.config[key][sub_key] = sub_value
|
||||
config[key][sub_key] = sub_value
|
||||
f.seek(0)
|
||||
toml.dump(config, f)
|
0
src/documenter/graphwiz.py
Normal file
0
src/documenter/graphwiz.py
Normal file
23
src/documenter/pdf.py
Normal file
23
src/documenter/pdf.py
Normal file
|
@ -0,0 +1,23 @@
|
|||
import os
|
||||
from io import BytesIO
|
||||
from markdown import markdown
|
||||
from xhtml2pdf import pisa
|
||||
|
||||
from src.config import Config
|
||||
|
||||
class PDF:
|
||||
def __init__(self):
|
||||
config = Config()
|
||||
self.pdf_path = config.get_pdfs_dir()
|
||||
|
||||
def markdown_to_pdf(self, markdown_string, project_name):
|
||||
html_string = markdown(markdown_string)
|
||||
|
||||
out_file_path = os.path.join(self.pdf_path, f"{project_name}.pdf")
|
||||
with open(out_file_path, "wb") as out_file:
|
||||
pisa_status = pisa.CreatePDF(html_string, dest=out_file)
|
||||
|
||||
if pisa_status.err:
|
||||
raise Exception("Error generating PDF")
|
||||
|
||||
return out_file_path
|
0
src/documenter/uml.py
Normal file
0
src/documenter/uml.py
Normal file
0
src/experts/__UNIMPLEMENTED__
Normal file
0
src/experts/__UNIMPLEMENTED__
Normal file
11
src/experts/chemistry.py
Normal file
11
src/experts/chemistry.py
Normal file
|
@ -0,0 +1,11 @@
|
|||
"""
|
||||
Function calls and Parser for:
|
||||
- SMILE Notation
|
||||
- Molecule Parser
|
||||
|
||||
Visualization for:
|
||||
- Molecule Structure
|
||||
- Molecule Properties
|
||||
|
||||
Use RDKit bindings
|
||||
"""
|
3
src/experts/game-dev.py
Normal file
3
src/experts/game-dev.py
Normal file
|
@ -0,0 +1,3 @@
|
|||
"""
|
||||
RAG for Unity/Godot/Unreal Engine code blocks
|
||||
"""
|
4
src/experts/math.py
Normal file
4
src/experts/math.py
Normal file
|
@ -0,0 +1,4 @@
|
|||
"""
|
||||
Evaluator Function Calling
|
||||
Wolphram Alpha Plugin
|
||||
"""
|
3
src/experts/medical.py
Normal file
3
src/experts/medical.py
Normal file
|
@ -0,0 +1,3 @@
|
|||
"""
|
||||
PubMed archive RAG
|
||||
"""
|
3
src/experts/physics.py
Normal file
3
src/experts/physics.py
Normal file
|
@ -0,0 +1,3 @@
|
|||
"""
|
||||
Physics Function Calls
|
||||
"""
|
3
src/experts/stackoverflow.py
Normal file
3
src/experts/stackoverflow.py
Normal file
|
@ -0,0 +1,3 @@
|
|||
"""
|
||||
Stack overflow query searcher and retrieval
|
||||
"""
|
3
src/experts/web-design.py
Normal file
3
src/experts/web-design.py
Normal file
|
@ -0,0 +1,3 @@
|
|||
"""
|
||||
Tailwind UI Components code snippets RAG
|
||||
"""
|
1
src/filesystem/__init__.py
Normal file
1
src/filesystem/__init__.py
Normal file
|
@ -0,0 +1 @@
|
|||
from .read_code import ReadCode
|
35
src/filesystem/read_code.py
Normal file
35
src/filesystem/read_code.py
Normal file
|
@ -0,0 +1,35 @@
|
|||
import os
|
||||
|
||||
from src.config import Config
|
||||
|
||||
"""
|
||||
TODO: Replace this with `code2prompt` - https://github.com/mufeedvh/code2prompt
|
||||
"""
|
||||
|
||||
class ReadCode:
|
||||
def __init__(self, project_name: str):
|
||||
config = Config()
|
||||
project_path = config.get_projects_dir()
|
||||
self.directory_path = os.path.join(project_path, project_name.lower().replace(" ", "-"))
|
||||
|
||||
def read_directory(self):
|
||||
files_list = []
|
||||
for root, _dirs, files in os.walk(self.directory_path):
|
||||
for file in files:
|
||||
try:
|
||||
file_path = os.path.join(root, file)
|
||||
with open(file_path, 'r') as file_content:
|
||||
files_list.append({"filename": file_path, "code": file_content.read()})
|
||||
except:
|
||||
pass
|
||||
|
||||
return files_list
|
||||
|
||||
def code_set_to_markdown(self):
|
||||
code_set = self.read_directory()
|
||||
markdown = ""
|
||||
for code in code_set:
|
||||
markdown += f"### {code['filename']}:\n\n"
|
||||
markdown += f"```\n{code['code']}\n```\n\n"
|
||||
markdown += "---\n\n"
|
||||
return markdown
|
32
src/init.py
Normal file
32
src/init.py
Normal file
|
@ -0,0 +1,32 @@
|
|||
import os
|
||||
from src.config import Config
|
||||
from src.logger import Logger
|
||||
|
||||
|
||||
def init_devika():
|
||||
logger = Logger()
|
||||
|
||||
logger.info("Initializing Devika...")
|
||||
logger.info("checking configurations...")
|
||||
|
||||
config = Config()
|
||||
|
||||
sqlite_db = config.get_sqlite_db()
|
||||
screenshots_dir = config.get_screenshots_dir()
|
||||
pdfs_dir = config.get_pdfs_dir()
|
||||
projects_dir = config.get_projects_dir()
|
||||
logs_dir = config.get_logs_dir()
|
||||
|
||||
logger.info("Initializing Prerequisites Jobs...")
|
||||
os.makedirs(os.path.dirname(sqlite_db), exist_ok=True)
|
||||
os.makedirs(screenshots_dir, exist_ok=True)
|
||||
os.makedirs(pdfs_dir, exist_ok=True)
|
||||
os.makedirs(projects_dir, exist_ok=True)
|
||||
os.makedirs(logs_dir, exist_ok=True)
|
||||
|
||||
from src.bert.sentence import SentenceBert
|
||||
|
||||
logger.info("Loading sentence-transformer BERT models...")
|
||||
prompt = "Light-weight keyword extraction exercise for BERT model loading.".strip()
|
||||
SentenceBert(prompt).extract_keywords()
|
||||
logger.info("BERT model loaded successfully.")
|
1
src/llm/__init__.py
Normal file
1
src/llm/__init__.py
Normal file
|
@ -0,0 +1 @@
|
|||
from .llm import LLM
|
26
src/llm/claude_client.py
Normal file
26
src/llm/claude_client.py
Normal file
|
@ -0,0 +1,26 @@
|
|||
from anthropic import Anthropic
|
||||
|
||||
from src.config import Config
|
||||
|
||||
class Claude:
|
||||
def __init__(self):
|
||||
config = Config()
|
||||
api_key = config.get_claude_api_key()
|
||||
self.client = Anthropic(
|
||||
api_key=api_key,
|
||||
)
|
||||
|
||||
def inference(self, model_id: str, prompt: str) -> str:
|
||||
message = self.client.messages.create(
|
||||
max_tokens=4096,
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": prompt.strip(),
|
||||
}
|
||||
],
|
||||
model=model_id,
|
||||
temperature=0
|
||||
)
|
||||
|
||||
return message.content[0].text
|
33
src/llm/gemini_client.py
Normal file
33
src/llm/gemini_client.py
Normal file
|
@ -0,0 +1,33 @@
|
|||
import google.generativeai as genai
|
||||
from google.generativeai.types import HarmCategory, HarmBlockThreshold
|
||||
|
||||
from src.config import Config
|
||||
|
||||
class Gemini:
|
||||
def __init__(self):
|
||||
config = Config()
|
||||
api_key = config.get_gemini_api_key()
|
||||
genai.configure(api_key=api_key)
|
||||
|
||||
def inference(self, model_id: str, prompt: str) -> str:
|
||||
config = genai.GenerationConfig(temperature=0)
|
||||
model = genai.GenerativeModel(model_id, generation_config=config)
|
||||
# Set safety settings for the request
|
||||
safety_settings = {
|
||||
HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
|
||||
HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
|
||||
# You can adjust other categories as needed
|
||||
}
|
||||
response = model.generate_content(prompt, safety_settings=safety_settings)
|
||||
try:
|
||||
# Check if the response contains text
|
||||
return response.text
|
||||
except ValueError:
|
||||
# If the response doesn't contain text, check if the prompt was blocked
|
||||
print("Prompt feedback:", response.prompt_feedback)
|
||||
# Also check the finish reason to see if the response was blocked
|
||||
print("Finish reason:", response.candidates[0].finish_reason)
|
||||
# If the finish reason was SAFETY, the safety ratings have more details
|
||||
print("Safety ratings:", response.candidates[0].safety_ratings)
|
||||
# Handle the error or return an appropriate message
|
||||
return "Error: Unable to generate content Gemini API"
|
24
src/llm/groq_client.py
Normal file
24
src/llm/groq_client.py
Normal file
|
@ -0,0 +1,24 @@
|
|||
from groq import Groq as _Groq
|
||||
|
||||
from src.config import Config
|
||||
|
||||
|
||||
class Groq:
|
||||
def __init__(self):
|
||||
config = Config()
|
||||
api_key = config.get_groq_api_key()
|
||||
self.client = _Groq(api_key=api_key)
|
||||
|
||||
def inference(self, model_id: str, prompt: str) -> str:
|
||||
chat_completion = self.client.chat.completions.create(
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": prompt.strip(),
|
||||
}
|
||||
],
|
||||
model=model_id,
|
||||
temperature=0
|
||||
)
|
||||
|
||||
return chat_completion.choices[0].message.content
|
149
src/llm/llm.py
Normal file
149
src/llm/llm.py
Normal file
|
@ -0,0 +1,149 @@
|
|||
import sys
|
||||
|
||||
import tiktoken
|
||||
from typing import List, Tuple
|
||||
|
||||
from src.socket_instance import emit_agent
|
||||
from .ollama_client import Ollama
|
||||
from .claude_client import Claude
|
||||
from .openai_client import OpenAi
|
||||
from .gemini_client import Gemini
|
||||
from .mistral_client import MistralAi
|
||||
from .groq_client import Groq
|
||||
|
||||
from src.state import AgentState
|
||||
|
||||
from src.config import Config
|
||||
from src.logger import Logger
|
||||
|
||||
TIKTOKEN_ENC = tiktoken.get_encoding("cl100k_base")
|
||||
|
||||
ollama = Ollama()
|
||||
logger = Logger()
|
||||
agentState = AgentState()
|
||||
config = Config()
|
||||
|
||||
|
||||
class LLM:
|
||||
def __init__(self, model_id: str = None):
|
||||
self.model_id = model_id
|
||||
self.log_prompts = config.get_logging_prompts()
|
||||
self.timeout_inference = config.get_timeout_inference()
|
||||
self.models = {
|
||||
"CLAUDE": [
|
||||
("Claude 3 Opus", "claude-3-opus-20240229"),
|
||||
("Claude 3 Sonnet", "claude-3-sonnet-20240229"),
|
||||
("Claude 3 Haiku", "claude-3-haiku-20240307"),
|
||||
],
|
||||
"OPENAI": [
|
||||
("GPT-4o", "gpt-4o"),
|
||||
("GPT-4 Turbo", "gpt-4-turbo"),
|
||||
("GPT-3.5 Turbo", "gpt-3.5-turbo-0125"),
|
||||
],
|
||||
"GOOGLE": [
|
||||
("Gemini 1.0 Pro", "gemini-pro"),
|
||||
("Gemini 1.5 Flash", "gemini-1.5-flash"),
|
||||
("Gemini 1.5 Pro", "gemini-1.5-pro"),
|
||||
],
|
||||
"MISTRAL": [
|
||||
("Mistral 7b", "open-mistral-7b"),
|
||||
("Mistral 8x7b", "open-mixtral-8x7b"),
|
||||
("Mistral Medium", "mistral-medium-latest"),
|
||||
("Mistral Small", "mistral-small-latest"),
|
||||
("Mistral Large", "mistral-large-latest"),
|
||||
],
|
||||
"GROQ": [
|
||||
("LLAMA3 8B", "llama3-8b-8192"),
|
||||
("LLAMA3 70B", "llama3-70b-8192"),
|
||||
("LLAMA2 70B", "llama2-70b-4096"),
|
||||
("Mixtral", "mixtral-8x7b-32768"),
|
||||
("GEMMA 7B", "gemma-7b-it"),
|
||||
],
|
||||
"OLLAMA": []
|
||||
}
|
||||
if ollama.client:
|
||||
self.models["OLLAMA"] = [(model["name"], model["name"]) for model in ollama.models]
|
||||
|
||||
def list_models(self) -> dict:
|
||||
return self.models
|
||||
|
||||
def model_enum(self, model_name: str) -> Tuple[str, str]:
|
||||
model_dict = {
|
||||
model[0]: (model_enum, model[1])
|
||||
for model_enum, models in self.models.items()
|
||||
for model in models
|
||||
}
|
||||
return model_dict.get(model_name, (None, None))
|
||||
|
||||
@staticmethod
|
||||
def update_global_token_usage(string: str, project_name: str):
|
||||
token_usage = len(TIKTOKEN_ENC.encode(string))
|
||||
agentState.update_token_usage(project_name, token_usage)
|
||||
|
||||
total = agentState.get_latest_token_usage(project_name) + token_usage
|
||||
emit_agent("tokens", {"token_usage": total})
|
||||
|
||||
def inference(self, prompt: str, project_name: str) -> str:
|
||||
self.update_global_token_usage(prompt, project_name)
|
||||
|
||||
model_enum, model_name = self.model_enum(self.model_id)
|
||||
|
||||
print(f"Model: {self.model_id}, Enum: {model_enum}")
|
||||
if model_enum is None:
|
||||
raise ValueError(f"Model {self.model_id} not supported")
|
||||
|
||||
model_mapping = {
|
||||
"OLLAMA": ollama,
|
||||
"CLAUDE": Claude(),
|
||||
"OPENAI": OpenAi(),
|
||||
"GOOGLE": Gemini(),
|
||||
"MISTRAL": MistralAi(),
|
||||
"GROQ": Groq()
|
||||
}
|
||||
|
||||
try:
|
||||
import concurrent.futures
|
||||
import time
|
||||
|
||||
start_time = time.time()
|
||||
model = model_mapping[model_enum]
|
||||
|
||||
with concurrent.futures.ThreadPoolExecutor() as executor:
|
||||
future = executor.submit(model.inference, model_name, prompt)
|
||||
try:
|
||||
while True:
|
||||
elapsed_time = time.time() - start_time
|
||||
elapsed_seconds = format(elapsed_time, ".2f")
|
||||
emit_agent("inference", {"type": "time", "elapsed_time": elapsed_seconds})
|
||||
if int(elapsed_time) == 5:
|
||||
emit_agent("inference", {"type": "warning", "message": "Inference is taking longer than expected"})
|
||||
if elapsed_time > self.timeout_inference:
|
||||
raise concurrent.futures.TimeoutError
|
||||
if future.done():
|
||||
break
|
||||
time.sleep(0.5)
|
||||
|
||||
response = future.result(timeout=self.timeout_inference).strip()
|
||||
|
||||
except concurrent.futures.TimeoutError:
|
||||
logger.error(f"Inference failed. took too long. Model: {model_enum}, Model ID: {self.model_id}")
|
||||
emit_agent("inference", {"type": "error", "message": "Inference took too long. Please try again."})
|
||||
response = False
|
||||
sys.exit()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(str(e))
|
||||
response = False
|
||||
emit_agent("inference", {"type": "error", "message": str(e)})
|
||||
sys.exit()
|
||||
|
||||
|
||||
except KeyError:
|
||||
raise ValueError(f"Model {model_enum} not supported")
|
||||
|
||||
if self.log_prompts:
|
||||
logger.debug(f"Response ({model}): --> {response}")
|
||||
|
||||
self.update_global_token_usage(response, project_name)
|
||||
|
||||
return response
|
22
src/llm/mistral_client.py
Normal file
22
src/llm/mistral_client.py
Normal file
|
@ -0,0 +1,22 @@
|
|||
from mistralai.client import MistralClient
|
||||
from mistralai.models.chat_completion import ChatMessage
|
||||
|
||||
from src.config import Config
|
||||
|
||||
|
||||
class MistralAi:
|
||||
def __init__(self):
|
||||
config = Config()
|
||||
api_key = config.get_mistral_api_key()
|
||||
self.client = MistralClient(api_key=api_key)
|
||||
|
||||
def inference(self, model_id: str, prompt: str) -> str:
|
||||
print("prompt", prompt.strip())
|
||||
chat_completion = self.client.chat(
|
||||
model=model_id,
|
||||
messages=[
|
||||
ChatMessage(role="user", content=prompt.strip())
|
||||
],
|
||||
temperature=0
|
||||
)
|
||||
return chat_completion.choices[0].message.content
|
25
src/llm/ollama_client.py
Normal file
25
src/llm/ollama_client.py
Normal file
|
@ -0,0 +1,25 @@
|
|||
import ollama
|
||||
from src.logger import Logger
|
||||
from src.config import Config
|
||||
|
||||
log = Logger()
|
||||
|
||||
|
||||
class Ollama:
|
||||
def __init__(self):
|
||||
try:
|
||||
self.client = ollama.Client(Config().get_ollama_api_endpoint())
|
||||
self.models = self.client.list()["models"]
|
||||
log.info("Ollama available")
|
||||
except:
|
||||
self.client = None
|
||||
log.warning("Ollama not available")
|
||||
log.warning("run ollama server to use ollama models otherwise use API models")
|
||||
|
||||
def inference(self, model_id: str, prompt: str) -> str:
|
||||
response = self.client.generate(
|
||||
model=model_id,
|
||||
prompt=prompt.strip(),
|
||||
options={"temperature": 0}
|
||||
)
|
||||
return response['response']
|
24
src/llm/openai_client.py
Normal file
24
src/llm/openai_client.py
Normal file
|
@ -0,0 +1,24 @@
|
|||
from openai import OpenAI
|
||||
|
||||
from src.config import Config
|
||||
|
||||
|
||||
class OpenAi:
|
||||
def __init__(self):
|
||||
config = Config()
|
||||
api_key = config.get_openai_api_key()
|
||||
base_url = config.get_openai_api_base_url()
|
||||
self.client = OpenAI(api_key=api_key, base_url=base_url)
|
||||
|
||||
def inference(self, model_id: str, prompt: str) -> str:
|
||||
chat_completion = self.client.chat.completions.create(
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": prompt.strip(),
|
||||
}
|
||||
],
|
||||
model=model_id,
|
||||
temperature=0
|
||||
)
|
||||
return chat_completion.choices[0].message.content
|
78
src/logger.py
Normal file
78
src/logger.py
Normal file
|
@ -0,0 +1,78 @@
|
|||
from functools import wraps
|
||||
|
||||
from fastlogging import LogInit
|
||||
from flask import request
|
||||
|
||||
from src.config import Config
|
||||
|
||||
|
||||
class Logger:
|
||||
def __init__(self, filename="devika_agent.log"):
|
||||
config = Config()
|
||||
logs_dir = config.get_logs_dir()
|
||||
self.logger = LogInit(pathName=logs_dir + "/" + filename, console=True, colors=True, encoding="utf-8")
|
||||
|
||||
def read_log_file(self) -> str:
|
||||
with open(self.logger.pathName, "r") as file:
|
||||
return file.read()
|
||||
|
||||
def info(self, message: str):
|
||||
self.logger.info(message)
|
||||
self.logger.flush()
|
||||
|
||||
def error(self, message: str):
|
||||
self.logger.error(message)
|
||||
self.logger.flush()
|
||||
|
||||
def warning(self, message: str):
|
||||
self.logger.warning(message)
|
||||
self.logger.flush()
|
||||
|
||||
def debug(self, message: str):
|
||||
self.logger.debug(message)
|
||||
self.logger.flush()
|
||||
|
||||
def exception(self, message: str):
|
||||
self.logger.exception(message)
|
||||
self.logger.flush()
|
||||
|
||||
|
||||
def route_logger(logger: Logger):
|
||||
"""
|
||||
Decorator factory that creates a decorator to log route entry and exit points.
|
||||
The decorator uses the provided logger to log the information.
|
||||
|
||||
:param logger: The logger instance to use for logging.
|
||||
"""
|
||||
|
||||
log_enabled = Config().get_logging_rest_api()
|
||||
|
||||
def decorator(func):
|
||||
|
||||
@wraps(func)
|
||||
def wrapper(*args, **kwargs):
|
||||
# Log entry point
|
||||
if log_enabled:
|
||||
logger.info(f"{request.path} {request.method}")
|
||||
|
||||
# Call the actual route function
|
||||
response = func(*args, **kwargs)
|
||||
|
||||
from werkzeug.wrappers import Response
|
||||
|
||||
# Log exit point, including response summary if possible
|
||||
try:
|
||||
if log_enabled:
|
||||
if isinstance(response, Response) and response.direct_passthrough:
|
||||
logger.debug(f"{request.path} {request.method} - Response: File response")
|
||||
else:
|
||||
response_summary = response.get_data(as_text=True)
|
||||
if 'settings' in request.path:
|
||||
response_summary = "*** Settings are not logged ***"
|
||||
logger.debug(f"{request.path} {request.method} - Response: {response_summary}")
|
||||
except Exception as e:
|
||||
logger.exception(f"{request.path} {request.method} - {e})")
|
||||
|
||||
return response
|
||||
return wrapper
|
||||
return decorator
|
1
src/memory/__init__.py
Normal file
1
src/memory/__init__.py
Normal file
|
@ -0,0 +1 @@
|
|||
from .knowledge_base import KnowledgeBase
|
33
src/memory/knowledge_base.py
Normal file
33
src/memory/knowledge_base.py
Normal file
|
@ -0,0 +1,33 @@
|
|||
from typing import Optional
|
||||
from sqlmodel import Field, Session, SQLModel, create_engine
|
||||
|
||||
from src.config import Config
|
||||
|
||||
"""
|
||||
TODO: The tag check should be a BM25 search, it's just a simple equality check now.
|
||||
"""
|
||||
|
||||
class Knowledge(SQLModel, table=True):
|
||||
id: Optional[int] = Field(default=None, primary_key=True)
|
||||
tag: str
|
||||
contents: str
|
||||
|
||||
class KnowledgeBase:
|
||||
def __init__(self):
|
||||
config = Config()
|
||||
sqlite_path = config.get_sqlite_db()
|
||||
self.engine = create_engine(f"sqlite:///{sqlite_path}")
|
||||
SQLModel.metadata.create_all(self.engine)
|
||||
|
||||
def add_knowledge(self, tag: str, contents: str):
|
||||
knowledge = Knowledge(tag=tag, contents=contents)
|
||||
with Session(self.engine) as session:
|
||||
session.add(knowledge)
|
||||
session.commit()
|
||||
|
||||
def get_knowledge(self, tag: str) -> str:
|
||||
with Session(self.engine) as session:
|
||||
knowledge = session.query(Knowledge).filter(Knowledge.tag == tag).first()
|
||||
if knowledge:
|
||||
return knowledge.contents
|
||||
return None
|
3
src/memory/rag.py
Normal file
3
src/memory/rag.py
Normal file
|
@ -0,0 +1,3 @@
|
|||
"""
|
||||
Vector Search for Code Docs + Docs Loading
|
||||
"""
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user