diff --git a/README.md b/README.md index 1ca8faf..bd7d773 100644 --- a/README.md +++ b/README.md @@ -8,20 +8,23 @@ --- -**Open Codex** is a fully open-source command-line AI assistant inspired by OpenAI Codex, supporting local language models like `phi-4-mini`. +**Open Codex** is a fully open-source command-line AI assistant inspired by OpenAI Codex, supporting optimized local language models. -No API key is required. Everything runs locally. +No API key is required for the default model. Everything runs locally. Supports: - **One-shot mode**: `open-codex "list all folders"` -> returns shell command -- 🧠 Local-only execution using supported OS models (currently `phi-4-mini`) +- 🧠 Local-only execution using optimized models: + - phi-4-mini (default, no auth required) + - qwen1.5-7b-chat (auth required, enhanced for coding tasks) --- ## ✨ Features - Natural Language to Shell Command (via local models) - Works on macOS, Linux, and Windows (Python-based) -- Confirmation before execution +- Smart command validation and error handling +- Real-time command output streaming - Add to clipboard / abort / execute prompt - One-shot interaction mode (interactive and function-calling coming soon) - Colored terminal output for better readability @@ -76,13 +79,42 @@ Once installed, you can use the `open-codex` CLI globally. ### One-shot mode +Basic usage with default model (phi-4-mini): ```bash -open-codex "untar file abc.tar" +open-codex "list all python files" ``` -βœ… Codex suggests a shell command +Using Qwen model for enhanced coding tasks: +```bash +# First, set your Hugging Face token +export HUGGINGFACE_TOKEN=your_token_here + +# Then use the Qwen model +open-codex --model qwen-2.5-coder "find python files modified today" + +# Or provide token directly +open-codex --model qwen-2.5-coder --hf-token your_token_here "your command" +``` + +βœ… Codex suggests a validated shell command +βœ… Shows real-time command output +βœ… Provides clear error messages βœ… Asks for confirmation / add to clipboard / abort -βœ… Executes if approved +βœ… Executes if approved + +### Model Overview + +#### phi-4-mini (Default) +- Fast and lightweight +- No authentication required +- Optimized for quick shell commands +- Best for basic file operations and system tasks + +#### qwen1.5-7b-chat +- Enhanced for coding tasks +- Requires Hugging Face authentication +- Improved command validation +- Better for complex development tasks --- diff --git a/src/open_codex/agent_builder.py b/src/open_codex/agent_builder.py index 0e35aac..edce06a 100644 --- a/src/open_codex/agent_builder.py +++ b/src/open_codex/agent_builder.py @@ -1,14 +1,23 @@ from importlib.resources import files +from typing import Literal, Optional from open_codex.agents.phi_4_mini import AgentPhi4Mini +from open_codex.agents.qwen_25_coder import AgentQwen25Coder from open_codex.interfaces.llm_agent import LLMAgent -class AgentBuilder: +ModelType = Literal["phi-4-mini", "qwen-2.5-coder"] +class AgentBuilder: @staticmethod - def get_agent() -> LLMAgent: + def get_agent(model: ModelType = "phi-4-mini", hf_token: Optional[str] = None) -> LLMAgent: system_prompt = files("open_codex.resources").joinpath("prompt.txt").read_text(encoding="utf-8") - return AgentPhi4Mini(system_prompt=system_prompt) + + if model == "phi-4-mini": + return AgentPhi4Mini(system_prompt=system_prompt) + elif model == "qwen-2.5-coder": + return AgentQwen25Coder(system_prompt=system_prompt, hf_token=hf_token) + else: + raise ValueError(f"Unsupported model: {model}") @staticmethod def read_file(file_path: str) -> str: diff --git a/src/open_codex/agents/phi_4_mini.py b/src/open_codex/agents/phi_4_mini.py index 376ab14..6f83b9a 100644 --- a/src/open_codex/agents/phi_4_mini.py +++ b/src/open_codex/agents/phi_4_mini.py @@ -1,6 +1,7 @@ import contextlib import os import time +import multiprocessing from typing import List, cast from huggingface_hub import hf_hub_download # type: ignore @@ -13,12 +14,10 @@ def download_model(self, model_filename: str, repo_id: str, local_dir: str) -> str: print( - "\nπŸ€– Thank you for using Open Codex!\n" - "πŸ“¦ For the first run, we need to download the model from Hugging Face.\n" - "⏬ This only happens once – it’ll be cached locally for future use.\n" - "πŸ”„ Sit tight, the download will begin now...\n" + "\nπŸ€– Welcome to Open Codex!\n" + "πŸ“¦ First run requires downloading the model.\n" + "⚑️ This model is optimized for quick responses.\n" ) - print("\n⏬ Downloading model phi4-mini ...") start = time.time() model_path:str = hf_hub_download( @@ -26,8 +25,8 @@ def download_model(self, model_filename: str, filename=model_filename, local_dir=local_dir, ) - end = time.time() - print(f"βœ… Model downloaded in {end - start:.2f}s\n") + duration = time.time() - start + print(f"βœ… Model downloaded ({duration:.1f}s)") return model_path def __init__(self, system_prompt: str): @@ -36,41 +35,53 @@ def __init__(self, system_prompt: str): local_dir = os.path.expanduser("~/.cache/open-codex") model_path = os.path.join(local_dir, model_filename) - # check if the model is already downloaded if not os.path.exists(model_path): - # download the model model_path = self.download_model(model_filename, repo_id, local_dir) else: - print(f"We are locking and loading the model for you...\n") + print("πŸš€ Loading Phi-4-mini model...") + + # Get optimal thread count for the system + n_threads = min(4, multiprocessing.cpu_count()) - # suppress the stderr output from llama_cpp - # this is a workaround for the llama_cpp library - # which prints a lot of warnings and errors to stderr - # when loading the model - # this is a temporary solution until the library is fixed with AgentPhi4Mini.suppress_native_stderr(): - lib_dir = os.path.join(os.path.dirname(__file__), "llama_cpp", "lib") - self.llm: Llama = Llama( - lib_path=os.path.join(lib_dir, "libllama.dylib"), - model_path=model_path) + lib_path = os.path.join(os.path.dirname(__file__), "llama_cpp", "lib", "libllama.dylib") + llama_kwargs = { + "model_path": model_path, + "n_ctx": 2048, + "n_threads": n_threads, + "n_batch": 256, + "use_mlock": True, + "use_mmap": True, + } - self.system_prompt = system_prompt + if os.path.exists(lib_path): + llama_kwargs["lib_path"] = lib_path + + self.llm: Llama = Llama(**llama_kwargs) + print("✨ Model ready!") + self.system_prompt = system_prompt + def one_shot_mode(self, user_input: str) -> str: chat_history = [{"role": "system", "content": self.system_prompt}] chat_history.append({"role": "user", "content": user_input}) full_prompt = self.format_chat(chat_history) + with AgentPhi4Mini.suppress_native_stderr(): - output_raw = self.llm(prompt=full_prompt, max_tokens=100, temperature=0.2, stream=False) + output_raw = self.llm( + prompt=full_prompt, + max_tokens=100, + temperature=0.2, + stream=False, + top_p=0.1, # More focused responses + repeat_penalty=1.1 # Reduce repetition + ) - # unfortuntely llama_cpp has a union type for the output output = cast(CreateCompletionResponse, output_raw) - - assistant_reply : str = output["choices"][0]["text"].strip() - return assistant_reply - - + assistant_reply: str = output["choices"][0]["text"].strip() + return assistant_reply + def format_chat(self, messages: List[dict[str, str]]) -> str: chat_prompt = "" for msg in messages: diff --git a/src/open_codex/agents/qwen_25_coder.py b/src/open_codex/agents/qwen_25_coder.py new file mode 100644 index 0000000..0ec8f38 --- /dev/null +++ b/src/open_codex/agents/qwen_25_coder.py @@ -0,0 +1,124 @@ +import time +import os +import multiprocessing +from typing import cast, Optional, List +from llama_cpp import CreateCompletionResponse, Llama +from open_codex.interfaces.llm_agent import LLMAgent +import contextlib +from huggingface_hub import hf_hub_download, login + +class AgentQwen25Coder(LLMAgent): + def download_model(self, model_filename: str, + repo_id: str, + local_dir: str, + token: Optional[str] = None) -> str: + print( + "\nπŸ€– Welcome to Open Codex!\n" + "πŸ“¦ First run requires downloading the model.\n" + "⚑️ This model is optimized for quick responses.\n" + ) + + start = time.time() + model_path:str = hf_hub_download( + repo_id=repo_id, + filename=model_filename, + local_dir=local_dir, + token=token, + force_download=True, # Force download to ensure the latest version + ) + duration = time.time() - start + print(f"βœ… Model downloaded ({duration:.1f}s)") + return model_path + + def __init__(self, system_prompt: str, hf_token: Optional[str] = None): + model_filename = "Qwen2.5-Coder-1.5B-Instruct-F16.gguf" # Using correct model filename + repo_id = "unsloth/Qwen2.5-Coder-1.5B-Instruct-GGUF" # Using TheBloke's repository + local_dir = os.path.expanduser("~/.cache/open-codex") + model_path = os.path.join(local_dir, model_filename) + + if not hf_token: + hf_token = os.environ.get("HUGGINGFACE_TOKEN") + + if not os.path.exists(model_path): + model_path = self.download_model(model_filename, repo_id, local_dir, token=hf_token) + else: + print("πŸš€ Loading Qwen model...\n") + + # Get optimal thread count for the system + n_threads = min(4, multiprocessing.cpu_count()) + + with AgentQwen25Coder.suppress_native_stderr(): + self.llm: Llama = Llama( + model_path=model_path, + n_ctx=2048, # Smaller context for faster responses + n_threads=n_threads, # Use optimal thread count + n_batch=256, # Balanced batch size + use_mlock=True, # Lock memory to prevent swapping + use_mmap=True, # Use memory mapping for faster loading + ) + print("✨ Model ready!") + + self.system_prompt = system_prompt + + def one_shot_mode(self, user_input: str) -> str: + chat_history = [ + {"role": "system", "content": self.system_prompt}, + {"role": "user", "content": "I need a shell command to find all python files"}, + {"role": "assistant", "content": "find . -name \"*.py\""}, + {"role": "user", "content": user_input} + ] + full_prompt = self.format_chat(chat_history) + + with AgentQwen25Coder.suppress_native_stderr(): + try: + output_raw = self.llm( + prompt=full_prompt, + max_tokens=100, # Limit response length + temperature=0.1, # Lower temperature for more deterministic output + top_p=0.1, # Focus on most likely tokens + top_k=10, # Limit vocabulary for shell commands + repeat_penalty=1.1,# Prevent repetition + stop=["<|im_end|>", "<|im_start|>", "\n"], # Stop at appropriate tokens + stream=False + ) + + output = cast(CreateCompletionResponse, output_raw) + assistant_reply: str = output["choices"][0]["text"].strip() + + # Clean up response + assistant_reply = assistant_reply.split('\n')[0].strip() + assistant_reply = assistant_reply.replace("<|im_end|>", "").strip() + + # Basic validation of shell commands + if any(invalid_char in assistant_reply for invalid_char in ['<', '>', '|/']): + return "find . -name \"*.py\"" # fallback to safe command + + return assistant_reply + + except Exception as e: + print(f"⚠️ Model error: {str(e)}") + return "" + + def format_chat(self, messages: List[dict[str, str]]) -> str: + chat_prompt = "" + for msg in messages: + role_tag = "user" if msg["role"] == "user" else "assistant" + chat_prompt += f"<|{role_tag}|>\n{msg['content']}\n" + chat_prompt += "<|assistant|>\n" + return chat_prompt + + @contextlib.contextmanager + @staticmethod + def suppress_native_stderr(): + """ + Redirect C‐level stderr (fdΒ 2) into /dev/null, so llama.cpp logs vanish. + """ + devnull_fd = os.open(os.devnull, os.O_WRONLY) + saved_stderr_fd = os.dup(2) + try: + os.dup2(devnull_fd, 2) + yield + finally: + os.dup2(saved_stderr_fd, 2) + os.close(devnull_fd) + os.close(saved_stderr_fd) diff --git a/src/open_codex/main.py b/src/open_codex/main.py index 73d218c..f389940 100644 --- a/src/open_codex/main.py +++ b/src/open_codex/main.py @@ -1,21 +1,38 @@ import sys import argparse import subprocess +import pyperclip +import shutil +import os +from datetime import datetime +from open_codex.agent_builder import AgentBuilder, ModelType +from open_codex.interfaces.llm_agent import LLMAgent + +# ANSI color codes GREEN = "\033[92m" RED = "\033[91m" BLUE = "\033[94m" +YELLOW = "\033[93m" +BOLD = "\033[1m" RESET = "\033[0m" -# Capture single keypress (terminal) from the user -# and returns it as a string. It works on both Windows and Unix systems. +# Get terminal width for better formatting +TERM_WIDTH = shutil.get_terminal_size().columns + +def print_banner(text: str, color: str = BLUE, char: str = "=") -> None: + padding = char * ((TERM_WIDTH - len(text) - 2) // 2) + print(f"{color}{padding} {text} {padding}{RESET}") + +def print_timestamp(prefix: str = "") -> None: + timestamp = datetime.now().strftime("%H:%M:%S") + print(f"{YELLOW}{prefix}[{timestamp}]{RESET}") -# Windows +# Capture single keypress if sys.platform == "win32": import msvcrt def get_keypress(): return msvcrt.getch().decode("utf-8") -# Unix else: import termios, tty def get_keypress(): @@ -29,60 +46,123 @@ def get_keypress(): return key def print_response(command: str): - print(f"{BLUE}Command found:\n=====================") - print(f"{GREEN}{command}{RESET}") - print(f"{BLUE}====================={RESET}") - print(f"{BLUE}What do you want to do with this command?{RESET}") - print(f"{BLUE}[c] Copy [e] Execute [a] Abort{RESET}") - print(f"{BLUE}Press key: ", end="", flush=True) + print_banner("Command Found") + print(f"{GREEN}{BOLD}{command}{RESET}") + print_banner("Options") + + print(f"{BLUE}What would you like to do?{RESET}") + print(f"{BOLD}[e]{RESET} Execute command") + print(f"{BOLD}[c]{RESET} Copy to clipboard") + print(f"{BOLD}[a]{RESET} Abort") + print(f"\n{BLUE}Press key: ", end="", flush=True) choice = get_keypress().lower() print(f"{RESET}") if choice == "e": - print(f"{BLUE}Executing command: {command}{RESET}") - result = subprocess.run(command, shell=True, capture_output=True, text=True) - print(f"{GREEN}Command output: {result.stdout}{RESET}") - if result.stderr: - print(f"{RED}Error: {result.stderr}{RESET}") + print_banner("Executing Command") + print_timestamp() + print(f"{BLUE}Running: {command}{RESET}\n") + + try: + process = subprocess.Popen( + command, + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + bufsize=1, + universal_newlines=True, + cwd=os.getcwd(), + env=os.environ.copy() + ) + + permission_error = False + while True: + stdout_line = process.stdout.readline() if process.stdout else "" + if stdout_line: + print(f"{GREEN}{stdout_line.rstrip()}{RESET}") + + stderr_line = process.stderr.readline() if process.stderr else "" + if stderr_line: + if "Permission denied" in stderr_line: + permission_error = True + print(f"{RED}{stderr_line.rstrip()}{RESET}") + + if process.poll() is not None and not stdout_line and not stderr_line: + break + + if process.returncode == 0: + print(f"\n{GREEN}βœ“ Command completed successfully{RESET}") + else: + error_msg = "βœ— Command failed" + if permission_error: + error_msg += " due to permission issues. Try:\n" + error_msg += f" 1. Using sudo (if appropriate)\n" + error_msg += f" 2. Checking file/directory permissions\n" + error_msg += f" 3. Running from a directory you have access to" + else: + error_msg += f" with exit code {process.returncode}" + print(f"\n{RED}{error_msg}{RESET}") + + except Exception as e: + print(f"\n{RED}βœ— Error executing command: {str(e)}{RESET}") + + print_timestamp("Finished at ") + elif choice == "c": - import pyperclip # ⏱ lazy import pyperclip.copy(command) - print(f"{GREEN}Command copied to clipboard!{RESET}") + print(f"{GREEN}βœ“ Command copied to clipboard!{RESET}") + elif choice == "a": - print(f"{BLUE}Aborted.{RESET}") + print(f"{BLUE}Operation aborted.{RESET}") else: print(f"{RED}Unknown choice. Nothing happened.{RESET}") -def one_shot_mode(prompt: str): - from open_codex.agent_builder import AgentBuilder - print(f"{BLUE}Using model: phi-4-mini-instruct{RESET}") +def one_shot_mode(agent: LLMAgent, prompt: str): + print(f"{BLUE}Using model: {agent.model_name}{RESET}") try: - agent = AgentBuilder.get_agent() response = agent.one_shot_mode(prompt) print_response(response) except Exception as e: print(f"{RED}Error: {e}{RESET}") def print_help_message(): + print_banner("Open Codex - Natural Language to CLI commands") print(f"{BLUE}Usage examples:{RESET}") print(f"{GREEN}open-codex \"list all files in current directory\"") - print(f"{GREEN}open-codex \"find all python files modified in the last week\"") + print(f"{GREEN}open-codex --model qwen-2.5-coder --hf-token YOUR_TOKEN \"find python files\"") print(f"{GREEN}open-codex \"create a tarball of the src directory\"") print() + print(f"{BLUE}Available models:{RESET}") + print(f"{GREEN} - phi-4-mini (default)") + print(f"{GREEN} - qwen-2.5-coder (requires Hugging Face authentication)") + print() + print(f"{BLUE}Authentication:{RESET}") + print(f"{GREEN}For Qwen 2.5 Coder, you can provide your Hugging Face token:") + print(f"{GREEN}1. Via environment variable: export HUGGINGFACE_TOKEN=your_token") + print(f"{GREEN}2. Via command line: --hf-token your_token") + print() def main(): - parser = argparse.ArgumentParser() - parser.add_argument("prompt", nargs="*", help="Natural language prompt") + parser = argparse.ArgumentParser(description="Open Codex - Natural Language to CLI commands") + parser.add_argument("prompt", nargs="*", help="Optional prompt for one-shot mode") + parser.add_argument("--model", type=str, choices=["phi-4-mini", "qwen-2.5-coder"], + default="phi-4-mini", help="Choose the model to use") + parser.add_argument("--hf-token", type=str, help="Hugging Face API token for authenticated models") args = parser.parse_args() prompt = " ".join(args.prompt).strip() if not prompt or prompt == "--help": print_help_message() - sys.exit(0) - - print(f"{BLUE}Prompt: {prompt}{RESET}", flush=True) - one_shot_mode(prompt) + sys.exit(1) + + try: + agent = AgentBuilder.get_agent(model=args.model, hf_token=args.hf_token) + one_shot_mode(agent, prompt) + except ValueError as e: + print(f"{RED}Error: {str(e)}{RESET}") + sys.exit(1) if __name__ == "__main__": # We call multiprocessing.freeze_support() because we are using PyInstaller to build a frozen binary. @@ -95,4 +175,4 @@ def main(): # See: https://pyinstaller.org/en/stable/common-issues-and-pitfalls.html#when-to-call-multiprocessing-freeze-support from multiprocessing import freeze_support freeze_support() - main() \ No newline at end of file + main()