diff --git a/README.md b/README.md
index 1ca8faf..bd7d773 100644
--- a/README.md
+++ b/README.md
@@ -8,20 +8,23 @@
 
 ---
 
-**Open Codex** is a fully open-source command-line AI assistant inspired by OpenAI Codex, supporting local language models like `phi-4-mini`.
+**Open Codex** is a fully open-source command-line AI assistant inspired by OpenAI Codex, supporting optimized local language models.
 
-No API key is required. Everything runs locally.
+No API key is required for the default model. Everything runs locally.
 
 Supports:
 - **One-shot mode**: `open-codex "list all folders"` -> returns shell command
-- 🧠 Local-only execution using supported OS models (currently `phi-4-mini`)
+- 🧠 Local-only execution using optimized models:
+  - phi-4-mini (default, no auth required)
+  - qwen1.5-7b-chat (auth required, enhanced for coding tasks)
 
 ---
 ## ✨ Features
 
 - Natural Language to Shell Command (via local models)
 - Works on macOS, Linux, and Windows (Python-based)
-- Confirmation before execution
+- Smart command validation and error handling
+- Real-time command output streaming
 - Add to clipboard / abort / execute prompt
 - One-shot interaction mode (interactive and function-calling coming soon)
 - Colored terminal output for better readability
@@ -76,13 +79,42 @@ Once installed, you can use the `open-codex` CLI globally.
 
 ### One-shot mode
 
+Basic usage with default model (phi-4-mini):
 ```bash
-open-codex "untar file abc.tar"
+open-codex "list all python files"
 ```
 
-✅ Codex suggests a shell command  
+Using Qwen model for enhanced coding tasks:
+```bash
+# First, set your Hugging Face token
+export HUGGINGFACE_TOKEN=your_token_here
+
+# Then use the Qwen model
+open-codex --model qwen-2.5-coder "find python files modified today"
+
+# Or provide token directly
+open-codex --model qwen-2.5-coder --hf-token your_token_here "your command"
+```
+
+✅ Codex suggests a validated shell command  
+✅ Shows real-time command output  
+✅ Provides clear error messages  
 ✅ Asks for confirmation / add to clipboard / abort  
-✅ Executes if approved
+✅ Executes if approved  
+
+### Model Overview
+
+#### phi-4-mini (Default)
+- Fast and lightweight
+- No authentication required
+- Optimized for quick shell commands
+- Best for basic file operations and system tasks
+
+#### qwen1.5-7b-chat
+- Enhanced for coding tasks
+- Requires Hugging Face authentication
+- Improved command validation
+- Better for complex development tasks
 
 ---
 
diff --git a/src/open_codex/agent_builder.py b/src/open_codex/agent_builder.py
index 0e35aac..edce06a 100644
--- a/src/open_codex/agent_builder.py
+++ b/src/open_codex/agent_builder.py
@@ -1,14 +1,23 @@
 from importlib.resources import files
+from typing import Literal, Optional
 
 from open_codex.agents.phi_4_mini import AgentPhi4Mini
+from open_codex.agents.qwen_25_coder import AgentQwen25Coder
 from open_codex.interfaces.llm_agent import LLMAgent
 
-class AgentBuilder:
+ModelType = Literal["phi-4-mini", "qwen-2.5-coder"]
 
+class AgentBuilder:
     @staticmethod
-    def get_agent() -> LLMAgent:
+    def get_agent(model: ModelType = "phi-4-mini", hf_token: Optional[str] = None) -> LLMAgent:
         system_prompt = files("open_codex.resources").joinpath("prompt.txt").read_text(encoding="utf-8")
-        return AgentPhi4Mini(system_prompt=system_prompt)
+        
+        if model == "phi-4-mini":
+            return AgentPhi4Mini(system_prompt=system_prompt)
+        elif model == "qwen-2.5-coder":
+            return AgentQwen25Coder(system_prompt=system_prompt, hf_token=hf_token)
+        else:
+            raise ValueError(f"Unsupported model: {model}")
     
     @staticmethod
     def read_file(file_path: str) -> str:
diff --git a/src/open_codex/agents/phi_4_mini.py b/src/open_codex/agents/phi_4_mini.py
index 376ab14..6f83b9a 100644
--- a/src/open_codex/agents/phi_4_mini.py
+++ b/src/open_codex/agents/phi_4_mini.py
@@ -1,6 +1,7 @@
 import contextlib
 import os
 import time
+import multiprocessing
 from typing import List, cast
 
 from huggingface_hub import hf_hub_download  # type: ignore
@@ -13,12 +14,10 @@ def download_model(self, model_filename: str,
                         repo_id: str, 
                         local_dir: str) -> str:
         print(
-            "\n🤖 Thank you for using Open Codex!\n"
-            "📦 For the first run, we need to download the model from Hugging Face.\n"
-            "⏬ This only happens once – it’ll be cached locally for future use.\n"
-            "🔄 Sit tight, the download will begin now...\n"
+            "\n🤖 Welcome to Open Codex!\n"
+            "📦 First run requires downloading the model.\n"
+            "⚡️ This model is optimized for quick responses.\n"
         )
-        print("\n⏬ Downloading model phi4-mini ...")
         
         start = time.time()
         model_path:str = hf_hub_download(
@@ -26,8 +25,8 @@ def download_model(self, model_filename: str,
             filename=model_filename,
             local_dir=local_dir,
         )
-        end = time.time()
-        print(f"✅ Model downloaded in {end - start:.2f}s\n")
+        duration = time.time() - start
+        print(f"✅ Model downloaded ({duration:.1f}s)")
         return model_path
 
     def __init__(self, system_prompt: str):
@@ -36,41 +35,53 @@ def __init__(self, system_prompt: str):
         local_dir = os.path.expanduser("~/.cache/open-codex")
         model_path = os.path.join(local_dir, model_filename)
 
-        # check if the model is already downloaded
         if not os.path.exists(model_path):
-            # download the model
             model_path = self.download_model(model_filename, repo_id, local_dir)
         else:
-            print(f"We are locking and loading the model for you...\n")
+            print("🚀 Loading Phi-4-mini model...")
+
+        # Get optimal thread count for the system
+        n_threads = min(4, multiprocessing.cpu_count())
 
-        # suppress the stderr output from llama_cpp
-        # this is a workaround for the llama_cpp library
-        # which prints a lot of warnings and errors to stderr
-        # when loading the model
-        # this is a temporary solution until the library is fixed
         with AgentPhi4Mini.suppress_native_stderr():
-            lib_dir = os.path.join(os.path.dirname(__file__), "llama_cpp", "lib")
-            self.llm: Llama = Llama(
-                lib_path=os.path.join(lib_dir, "libllama.dylib"),
-                model_path=model_path)  
+          lib_path = os.path.join(os.path.dirname(__file__), "llama_cpp", "lib", "libllama.dylib")
+          llama_kwargs = {
+              "model_path": model_path,
+              "n_ctx": 2048,
+              "n_threads": n_threads,
+              "n_batch": 256,
+              "use_mlock": True,
+              "use_mmap": True,
+          }
 
-        self.system_prompt = system_prompt
+          if os.path.exists(lib_path):
+              llama_kwargs["lib_path"] = lib_path
+
+          self.llm: Llama = Llama(**llama_kwargs)
+          print("✨ Model ready!")
 
 
+        self.system_prompt = system_prompt
+
     def one_shot_mode(self, user_input: str) -> str:
         chat_history = [{"role": "system", "content": self.system_prompt}]
         chat_history.append({"role": "user", "content": user_input})
         full_prompt = self.format_chat(chat_history)
+        
         with AgentPhi4Mini.suppress_native_stderr():
-            output_raw = self.llm(prompt=full_prompt, max_tokens=100, temperature=0.2, stream=False)
+            output_raw = self.llm(
+                prompt=full_prompt,
+                max_tokens=100,
+                temperature=0.2,
+                stream=False,
+                top_p=0.1,      # More focused responses
+                repeat_penalty=1.1  # Reduce repetition
+            )
         
-        # unfortuntely llama_cpp has a union type for the output
         output = cast(CreateCompletionResponse, output_raw)
-        
-        assistant_reply : str = output["choices"][0]["text"].strip() 
-        return assistant_reply 
-        
-    
+        assistant_reply: str = output["choices"][0]["text"].strip()
+        return assistant_reply
+
     def format_chat(self, messages: List[dict[str, str]]) -> str:
         chat_prompt = ""
         for msg in messages:
diff --git a/src/open_codex/agents/qwen_25_coder.py b/src/open_codex/agents/qwen_25_coder.py
new file mode 100644
index 0000000..0ec8f38
--- /dev/null
+++ b/src/open_codex/agents/qwen_25_coder.py
@@ -0,0 +1,124 @@
+import time
+import os
+import multiprocessing
+from typing import cast, Optional, List
+from llama_cpp import CreateCompletionResponse, Llama
+from open_codex.interfaces.llm_agent import LLMAgent
+import contextlib
+from huggingface_hub import hf_hub_download, login
+
+class AgentQwen25Coder(LLMAgent):
+    def download_model(self, model_filename: str,
+                        repo_id: str, 
+                        local_dir: str,
+                        token: Optional[str] = None) -> str:
+        print(
+            "\n🤖 Welcome to Open Codex!\n"
+            "📦 First run requires downloading the model.\n"
+            "⚡️ This model is optimized for quick responses.\n"
+        )
+        
+        start = time.time()
+        model_path:str = hf_hub_download(
+            repo_id=repo_id,
+            filename=model_filename,
+            local_dir=local_dir,
+            token=token,
+            force_download=True,  # Force download to ensure the latest version
+        )
+        duration = time.time() - start
+        print(f"✅ Model downloaded ({duration:.1f}s)")
+        return model_path
+
+    def __init__(self, system_prompt: str, hf_token: Optional[str] = None):
+        model_filename = "Qwen2.5-Coder-1.5B-Instruct-F16.gguf"  # Using correct model filename
+        repo_id = "unsloth/Qwen2.5-Coder-1.5B-Instruct-GGUF"      # Using TheBloke's repository
+        local_dir = os.path.expanduser("~/.cache/open-codex")
+        model_path = os.path.join(local_dir, model_filename)
+
+        if not hf_token:
+            hf_token = os.environ.get("HUGGINGFACE_TOKEN")
+
+        if not os.path.exists(model_path):
+            model_path = self.download_model(model_filename, repo_id, local_dir, token=hf_token)
+        else:
+            print("🚀 Loading Qwen model...\n")
+
+        # Get optimal thread count for the system
+        n_threads = min(4, multiprocessing.cpu_count())
+        
+        with AgentQwen25Coder.suppress_native_stderr():
+            self.llm: Llama = Llama(
+                model_path=model_path,
+                n_ctx=2048,     # Smaller context for faster responses
+                n_threads=n_threads,  # Use optimal thread count
+                n_batch=256,    # Balanced batch size
+                use_mlock=True, # Lock memory to prevent swapping
+                use_mmap=True,  # Use memory mapping for faster loading
+            )
+            print("✨ Model ready!")
+
+        self.system_prompt = system_prompt
+
+    def one_shot_mode(self, user_input: str) -> str:
+        chat_history = [
+            {"role": "system", "content": self.system_prompt},
+            {"role": "user", "content": "I need a shell command to find all python files"},
+            {"role": "assistant", "content": "find . -name \"*.py\""},
+            {"role": "user", "content": user_input}
+        ]
+        full_prompt = self.format_chat(chat_history)
+        
+        with AgentQwen25Coder.suppress_native_stderr():
+            try:
+                output_raw = self.llm(
+                    prompt=full_prompt,
+                    max_tokens=100,    # Limit response length
+                    temperature=0.1,   # Lower temperature for more deterministic output
+                    top_p=0.1,        # Focus on most likely tokens
+                    top_k=10,         # Limit vocabulary for shell commands
+                    repeat_penalty=1.1,# Prevent repetition
+                    stop=["<|im_end|>", "<|im_start|>", "\n"],  # Stop at appropriate tokens
+                    stream=False
+                )
+                
+                output = cast(CreateCompletionResponse, output_raw)
+                assistant_reply: str = output["choices"][0]["text"].strip()
+                
+                # Clean up response
+                assistant_reply = assistant_reply.split('\n')[0].strip()
+                assistant_reply = assistant_reply.replace("<|im_end|>", "").strip()
+                
+                # Basic validation of shell commands
+                if any(invalid_char in assistant_reply for invalid_char in ['<', '>', '|/']):
+                    return "find . -name \"*.py\""  # fallback to safe command
+                    
+                return assistant_reply
+
+            except Exception as e:
+                print(f"⚠️  Model error: {str(e)}")
+                return ""
+
+    def format_chat(self, messages: List[dict[str, str]]) -> str:
+        chat_prompt = ""
+        for msg in messages:
+            role_tag = "user" if msg["role"] == "user" else "assistant"
+            chat_prompt += f"<|{role_tag}|>\n{msg['content']}\n"
+        chat_prompt += "<|assistant|>\n"
+        return chat_prompt
+
+    @contextlib.contextmanager
+    @staticmethod
+    def suppress_native_stderr():
+        """
+        Redirect C‐level stderr (fd 2) into /dev/null, so llama.cpp logs vanish.
+        """
+        devnull_fd = os.open(os.devnull, os.O_WRONLY)
+        saved_stderr_fd = os.dup(2)
+        try:
+            os.dup2(devnull_fd, 2)
+            yield
+        finally:
+            os.dup2(saved_stderr_fd, 2)
+            os.close(devnull_fd)
+            os.close(saved_stderr_fd)
diff --git a/src/open_codex/main.py b/src/open_codex/main.py
index 73d218c..f389940 100644
--- a/src/open_codex/main.py
+++ b/src/open_codex/main.py
@@ -1,21 +1,38 @@
 import sys
 import argparse
 import subprocess
+import pyperclip
+import shutil
+import os
+from datetime import datetime
 
+from open_codex.agent_builder import AgentBuilder, ModelType
+from open_codex.interfaces.llm_agent import LLMAgent
+
+# ANSI color codes
 GREEN = "\033[92m"
 RED = "\033[91m"
 BLUE = "\033[94m"
+YELLOW = "\033[93m"
+BOLD = "\033[1m"
 RESET = "\033[0m"
 
-# Capture single keypress (terminal) from the user
-# and returns it as a string. It works on both Windows and Unix systems.
+# Get terminal width for better formatting
+TERM_WIDTH = shutil.get_terminal_size().columns
+
+def print_banner(text: str, color: str = BLUE, char: str = "=") -> None:
+    padding = char * ((TERM_WIDTH - len(text) - 2) // 2)
+    print(f"{color}{padding} {text} {padding}{RESET}")
+
+def print_timestamp(prefix: str = "") -> None:
+    timestamp = datetime.now().strftime("%H:%M:%S")
+    print(f"{YELLOW}{prefix}[{timestamp}]{RESET}")
 
-# Windows
+# Capture single keypress
 if sys.platform == "win32":
     import msvcrt
     def get_keypress():
         return msvcrt.getch().decode("utf-8")
-# Unix
 else:
     import termios, tty
     def get_keypress():
@@ -29,60 +46,123 @@ def get_keypress():
         return key
 
 def print_response(command: str):
-    print(f"{BLUE}Command found:\n=====================")
-    print(f"{GREEN}{command}{RESET}")
-    print(f"{BLUE}====================={RESET}")
-    print(f"{BLUE}What do you want to do with this command?{RESET}")
-    print(f"{BLUE}[c] Copy  [e] Execute  [a] Abort{RESET}")
-    print(f"{BLUE}Press key: ", end="", flush=True)
+    print_banner("Command Found")
+    print(f"{GREEN}{BOLD}{command}{RESET}")
+    print_banner("Options")
+    
+    print(f"{BLUE}What would you like to do?{RESET}")
+    print(f"{BOLD}[e]{RESET} Execute command")
+    print(f"{BOLD}[c]{RESET} Copy to clipboard")
+    print(f"{BOLD}[a]{RESET} Abort")
+    print(f"\n{BLUE}Press key: ", end="", flush=True)
 
     choice = get_keypress().lower()
     print(f"{RESET}")
 
     if choice == "e":
-        print(f"{BLUE}Executing command: {command}{RESET}")
-        result = subprocess.run(command, shell=True, capture_output=True, text=True)
-        print(f"{GREEN}Command output: {result.stdout}{RESET}")
-        if result.stderr:
-            print(f"{RED}Error: {result.stderr}{RESET}")
+        print_banner("Executing Command")
+        print_timestamp()
+        print(f"{BLUE}Running: {command}{RESET}\n")
+        
+        try:
+            process = subprocess.Popen(
+                command,
+                shell=True,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+                text=True,
+                bufsize=1,
+                universal_newlines=True,
+                cwd=os.getcwd(),
+                env=os.environ.copy()
+            )
+            
+            permission_error = False
+            while True:
+                stdout_line = process.stdout.readline() if process.stdout else ""
+                if stdout_line:
+                    print(f"{GREEN}{stdout_line.rstrip()}{RESET}")
+                
+                stderr_line = process.stderr.readline() if process.stderr else ""
+                if stderr_line:
+                    if "Permission denied" in stderr_line:
+                        permission_error = True
+                    print(f"{RED}{stderr_line.rstrip()}{RESET}")
+                
+                if process.poll() is not None and not stdout_line and not stderr_line:
+                    break
+            
+            if process.returncode == 0:
+                print(f"\n{GREEN}✓ Command completed successfully{RESET}")
+            else:
+                error_msg = "✗ Command failed"
+                if permission_error:
+                    error_msg += " due to permission issues. Try:\n"
+                    error_msg += f"  1. Using sudo (if appropriate)\n"
+                    error_msg += f"  2. Checking file/directory permissions\n"
+                    error_msg += f"  3. Running from a directory you have access to"
+                else:
+                    error_msg += f" with exit code {process.returncode}"
+                print(f"\n{RED}{error_msg}{RESET}")
+
+        except Exception as e:
+            print(f"\n{RED}✗ Error executing command: {str(e)}{RESET}")
+        
+        print_timestamp("Finished at ")
+
     elif choice == "c":
-        import pyperclip  # ⏱ lazy import
         pyperclip.copy(command)
-        print(f"{GREEN}Command copied to clipboard!{RESET}")
+        print(f"{GREEN}✓ Command copied to clipboard!{RESET}")
+
     elif choice == "a":
-        print(f"{BLUE}Aborted.{RESET}")
+        print(f"{BLUE}Operation aborted.{RESET}")
     else:
         print(f"{RED}Unknown choice. Nothing happened.{RESET}")
 
-def one_shot_mode(prompt: str):
-    from open_codex.agent_builder import AgentBuilder
-    print(f"{BLUE}Using model: phi-4-mini-instruct{RESET}")
+def one_shot_mode(agent: LLMAgent, prompt: str):
+    print(f"{BLUE}Using model: {agent.model_name}{RESET}")
     try:
-        agent = AgentBuilder.get_agent()
         response = agent.one_shot_mode(prompt)
         print_response(response)
     except Exception as e:
         print(f"{RED}Error: {e}{RESET}")
 
 def print_help_message():
+    print_banner("Open Codex - Natural Language to CLI commands")
     print(f"{BLUE}Usage examples:{RESET}")
     print(f"{GREEN}open-codex \"list all files in current directory\"")
-    print(f"{GREEN}open-codex \"find all python files modified in the last week\"")
+    print(f"{GREEN}open-codex --model qwen-2.5-coder --hf-token YOUR_TOKEN \"find python files\"")
     print(f"{GREEN}open-codex \"create a tarball of the src directory\"")
     print()
+    print(f"{BLUE}Available models:{RESET}")
+    print(f"{GREEN}  - phi-4-mini (default)")
+    print(f"{GREEN}  - qwen-2.5-coder (requires Hugging Face authentication)")
+    print()
+    print(f"{BLUE}Authentication:{RESET}")
+    print(f"{GREEN}For Qwen 2.5 Coder, you can provide your Hugging Face token:")
+    print(f"{GREEN}1. Via environment variable: export HUGGINGFACE_TOKEN=your_token")
+    print(f"{GREEN}2. Via command line: --hf-token your_token")
+    print()
 
 def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("prompt", nargs="*", help="Natural language prompt")
+    parser = argparse.ArgumentParser(description="Open Codex - Natural Language to CLI commands")
+    parser.add_argument("prompt", nargs="*", help="Optional prompt for one-shot mode")
+    parser.add_argument("--model", type=str, choices=["phi-4-mini", "qwen-2.5-coder"],
+                        default="phi-4-mini", help="Choose the model to use")
+    parser.add_argument("--hf-token", type=str, help="Hugging Face API token for authenticated models")
     args = parser.parse_args()
     prompt = " ".join(args.prompt).strip()
 
     if not prompt or prompt == "--help":
         print_help_message()
-        sys.exit(0)
-
-    print(f"{BLUE}Prompt: {prompt}{RESET}", flush=True)
-    one_shot_mode(prompt)
+        sys.exit(1)
+    
+    try:
+        agent = AgentBuilder.get_agent(model=args.model, hf_token=args.hf_token)
+        one_shot_mode(agent, prompt)
+    except ValueError as e:
+        print(f"{RED}Error: {str(e)}{RESET}")
+        sys.exit(1)
 
 if __name__ == "__main__":
     # We call multiprocessing.freeze_support() because we are using PyInstaller to build a frozen binary.
@@ -95,4 +175,4 @@ def main():
     # See: https://pyinstaller.org/en/stable/common-issues-and-pitfalls.html#when-to-call-multiprocessing-freeze-support
     from multiprocessing import freeze_support
     freeze_support()
-    main()
\ No newline at end of file
+    main()