Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor code and add new features #6

Merged
merged 72 commits into from
Mar 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
72 commits
Select commit Hold shift + click to select a range
f4f018b
Refactor CLI module and add cache directory creation
Younis-Ahmed Mar 17, 2024
40ec711
Add OpenAIEmbeddings and recursive_load_files functions
Younis-Ahmed Mar 17, 2024
112e82e
Add enums and lists for file and directory exclusions
Younis-Ahmed Mar 17, 2024
7c8f75a
Add recursive_load_files function to load all files in the git reposi…
Younis-Ahmed Mar 17, 2024
5906ca0
Update function to handle edge cases
Younis-Ahmed Mar 18, 2024
aa7f7b0
Update python version and add new dependencies
Younis-Ahmed Mar 18, 2024
45ca2da
Add Python version check in main function
Younis-Ahmed Mar 18, 2024
6f2a277
add langchain and sibling packages
Younis-Ahmed Mar 18, 2024
a289ddc
Add langchain dependencies
Younis-Ahmed Mar 18, 2024
df4f618
Refactor file loading logic in main function
Younis-Ahmed Mar 18, 2024
ed7a379
Add file_handler module for file I/O operations
Younis-Ahmed Mar 18, 2024
2db9094
Refactor code to improve performance and readability
Younis-Ahmed Mar 18, 2024
52a6ce9
Add language parser and file extension handling
Younis-Ahmed Mar 18, 2024
939d144
Refactor get_langchain_text_splitters function to use a dictionary fo…
Younis-Ahmed Mar 18, 2024
45a7e9e
Refactor file loading and parsing logic in main() function
Younis-Ahmed Mar 18, 2024
aee44f9
Refactor file loading logic in main function
Younis-Ahmed Mar 19, 2024
750fd5a
Update content-hash in poetry.lock
Younis-Ahmed Mar 19, 2024
732bc8d
Add tiktoken dependency
Younis-Ahmed Mar 19, 2024
7831b25
Add language and text splitting functionality
Younis-Ahmed Mar 19, 2024
6af969a
Add tree_parser module
Younis-Ahmed Mar 19, 2024
754176e
Add base class for tree-sitter parsers
Younis-Ahmed Mar 19, 2024
38cc840
Add TreesitterC class for parsing C code using tree-sitter library
Younis-Ahmed Mar 19, 2024
840716d
Add TreesitterCpp class to parse C++ code using the tree-sitter library
Younis-Ahmed Mar 19, 2024
3da8f63
Add get_hash function to retrieve file hash
Younis-Ahmed Mar 20, 2024
09e3c24
Add return type annotation to _query_all_methods and _query_method_na…
Younis-Ahmed Mar 20, 2024
6583070
Refactor code to use parse_code_files function
Younis-Ahmed Mar 20, 2024
4d6d7ad
Add code file parsing functionality
Younis-Ahmed Mar 20, 2024
7ff0ae4
Update language mappings in llm_handler.py
Younis-Ahmed Mar 20, 2024
c8435fe
Add VectorStore class for storing embeddings and metadata
Younis-Ahmed Mar 20, 2024
54cef33
Add VectorCache class and related functions to cache vector data
Younis-Ahmed Mar 21, 2024
869c97a
Add FAISS installation prompt and cache saving
Younis-Ahmed Mar 21, 2024
1836769
Update vec_store.py with vector cache and retrieval functionality
Younis-Ahmed Mar 21, 2024
929c896
Fix bug in login functionality
Younis-Ahmed Mar 21, 2024
1567441
Add ConversationSummaryMemory and ConversationalRetrievalChain imports
Younis-Ahmed Mar 21, 2024
1188b10
Add load_docs method to VectorStore class
Younis-Ahmed Mar 21, 2024
f9b8d9d
Fix Haskell support in get_langchain_language function
Younis-Ahmed Mar 21, 2024
0e36b5e
Refactor vec_store.py to remove unused imports
Younis-Ahmed Mar 21, 2024
d69c6bb
Update CLI to use FaissModel enum values and add chat functionality
Younis-Ahmed Mar 22, 2024
67f15d9
Fix duplicate config
Younis-Ahmed Mar 22, 2024
b9e3f7d
Update configuration handling in conf.py
Younis-Ahmed Mar 23, 2024
b8f6a88
Remove duplicate config in main function
Younis-Ahmed Mar 23, 2024
920c3ae
Add type hints and fix none return type in parse_code_files function
Younis-Ahmed Mar 23, 2024
e60bc23
Update cli.py with ChatOpenAI model and fix print statement
Younis-Ahmed Mar 23, 2024
1f1f776
Exclude too few public methods in pylint configuration file
Younis-Ahmed Mar 23, 2024
2833e0d
Fix OpenAI API key validation
Younis-Ahmed Mar 23, 2024
e57931d
Add type hints and fix none return type in file_handler.py
Younis-Ahmed Mar 23, 2024
c92d912
Add tree parsers for various programming languages
Younis-Ahmed Mar 23, 2024
18140e7
Fix issue with TreesitterMethodNode class and add tree attribute
Younis-Ahmed Mar 23, 2024
21d2cdc
Update import statements and convert repo name to uppercase
Younis-Ahmed Mar 23, 2024
435a9da
Update username to uppercase in config_init() function
Younis-Ahmed Mar 23, 2024
8e9fecf
Fix repo_root variable assignment
Younis-Ahmed Mar 23, 2024
cac8794
Add import statement for PackageNotFoundError
Younis-Ahmed Mar 23, 2024
4fbcf0d
Update pyproject.toml
Younis-Ahmed Mar 24, 2024
06a5c6e
Fix import error and update help message
Younis-Ahmed Mar 24, 2024
2c8c25d
Add faiss_installed function to check if faiss is installed
Younis-Ahmed Mar 24, 2024
9c9e048
Refactor tree parser imports and function names
Younis-Ahmed Mar 24, 2024
d83ca04
Update tree parser imports
Younis-Ahmed Mar 24, 2024
47aa89c
Refactor tree-sitter parser classes and rename TreesitterMethodNode t…
Younis-Ahmed Mar 24, 2024
170d828
Refactor TreesitterC to TreeParserC
Younis-Ahmed Mar 24, 2024
60466fd
Refactor C++ tree parser implementation
Younis-Ahmed Mar 24, 2024
fb57d84
Add TreeParserCsharp class for parsing C# code using tree-sitter library
Younis-Ahmed Mar 24, 2024
ec2c4cb
Add Go tree parser implementation
Younis-Ahmed Mar 24, 2024
5eec04a
Add Java tree parser module
Younis-Ahmed Mar 24, 2024
8a99df0
Add TreeParserJs for JavaScript
Younis-Ahmed Mar 24, 2024
bccbe73
Add TreeParserKotlin class for parsing Kotlin code using tree-sitter …
Younis-Ahmed Mar 24, 2024
5130b41
Add Python code parsing using tree-sitter library
Younis-Ahmed Mar 24, 2024
2176f35
Add Ruby tree parser implementation
Younis-Ahmed Mar 24, 2024
29dd428
Add TreeParseTypescript class for parsing TypeScript code using tree-…
Younis-Ahmed Mar 24, 2024
c38d67f
Add TreeParserRegistry class for managing tree-sitter parsers
Younis-Ahmed Mar 24, 2024
935d61c
Add Rust code parser using tree-sitter library
Younis-Ahmed Mar 24, 2024
448ab0d
Update author email in pyproject.toml
Younis-Ahmed Mar 24, 2024
2f8741a
Add markdown-it-py package and its dependencies
Younis-Ahmed Mar 24, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ valid-metaclass-classmethod-first-arg=mcs

# List of regular expressions of class ancestor names to ignore when counting
# public methods (see R0903)
exclude-too-few-public-methods=
# exclude-too-few-public-methods=

# List of qualified class names to ignore when counting class parents (see
# R0901)
Expand Down
1,492 changes: 1,478 additions & 14 deletions poetry.lock

Large diffs are not rendered by default.

11 changes: 10 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,19 @@ authors = ["Younis <[email protected]>"]
readme = "README.md"

[tool.poetry.dependencies]
python = "^3.10"
python = ">=3.9.1,<3.12"
inquirer = "^3.2.4"
openai = "^1.14.0"
toml = "^0.10.2"
esprima = "^4.0.1"
tree-sitter = "^0.21.0"
tree-sitter-languages = "^1.10.2"
langchain = "^0.1.12"
langchain-community = "^0.0.28"
langchain-text-splitters = "^0.0.1"
langchain-openai = "^0.0.8"
tiktoken = "^0.6.0"
rich = "^13.7.1"


[tool.poetry.group.dev.dependencies]
Expand Down
48 changes: 48 additions & 0 deletions senior_swe_ai/cache.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,39 @@
""" This module contains functions to create and get the cache directory
for the application.
"""
import json
import os
import platform
from pathlib import Path

from altair import Self


class VectorCache:
""" VectorCache class for storing vector data """

def __init__(self, filename, vec_ids, commit_hash) -> None:
self.filename = filename
self.vector_ids = vec_ids
self.commit_hash = commit_hash

def to_json(self) -> str:
"""Convert the object to json"""
return {
"filename": self.filename,
"commit_hash": self.commit_hash,
"vector_ids": self.vector_ids,
}

@classmethod
def from_json(cls, data: dict[str, str]) -> Self:
"""Create a VectorCache object from json"""
return cls(
filename=data.get("filename"),
vec_ids=data.get("vector_ids"),
commit_hash=data.get("commit_hash"),
)


def get_cache_path() -> str:
"""Get the cache directory path for the application."""
Expand All @@ -28,3 +57,22 @@ def create_cache_dir() -> None:
if not os.path.exists(get_cache_path()):
path = Path(get_cache_path())
path.mkdir(parents=True, exist_ok=True)


def load_vec_cache(filename: str) -> dict[str, VectorCache]:
"""Load the vector cache from the given file."""
with open(get_cache_path() + f'/{filename}', 'r', encoding='utf-8') as f:
vec = json.load(f)
vec_cache = {}
for key, value in vec.items():
vec_cache[key] = VectorCache.from_json(value)
return vec_cache


def save_vec_cache(vector_cache, filename) -> None:
"""Save the vector cache to the given file."""
with open(
get_cache_path() + "/" + filename, "w", encoding="utf-8"
) as vector_cache_file:
json.dump(vector_cache, default=VectorCache.to_json,
fp=vector_cache_file)
106 changes: 101 additions & 5 deletions senior_swe_ai/cli.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,39 @@
""" SeniorSWE cli tool utilize AI to help you with your project """
from argparse import ArgumentParser, Namespace
import os
import sys
from typing import Any
from senior_swe_ai.git_process import is_git_repo, get_repo_name, get_repo_root
from typing import List
from langchain.memory import ConversationSummaryMemory
from langchain.chains.conversational_retrieval.base import (
BaseConversationalRetrievalChain, ConversationalRetrievalChain
)
import inquirer
from langchain_core.documents.base import Document
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from senior_swe_ai.file_handler import parse_code_files
from senior_swe_ai.git_process import (
is_git_repo, get_repo_name, get_repo_root, recursive_load_files
)
from senior_swe_ai.conf import config_init, load_conf, append_conf
from senior_swe_ai.cache import create_cache_dir, get_cache_path, save_vec_cache
from senior_swe_ai.vec_store import VectorStore
from senior_swe_ai.consts import FaissModel, faiss_installed


def main() -> None:
""" __main__ """
py_version: tuple[int, int] = sys.version_info[:2]
if py_version < (3, 9) or py_version > (3, 12):
print('This app requires Python ^3.9.x or >3.12.x')
sys.exit(1)

parser = ArgumentParser(
description='SeniorSWE cli tool utilize AI to help you with your project'
)

parser.add_argument(
'options', choices=['init', 'chat'],
help="'init': initialize the app. 'chat': chat with the AI"
help="'init': initialize the app. 'chat': chat with desired codebase."
)

args: Namespace = parser.parse_args()
Expand All @@ -28,18 +47,95 @@ def main() -> None:
print('The current directory is not a git repository')
sys.exit(1)

repo_name: str = get_repo_name()
repo_name: str = get_repo_name().upper()
repo_root: str = get_repo_root()

append_conf({'repo_name': repo_name, 'repo_root': repo_root})

try:
conf: dict[Any, Any] = load_conf()
conf: dict[str, str] = load_conf()
except FileNotFoundError:
config_init()
append_conf({'repo_name': repo_name, 'repo_root': repo_root})
conf = load_conf()

create_cache_dir()

embed_mdl = OpenAIEmbeddings(
model=conf['embed_model'], api_key=conf['api_key'])

vec_store = VectorStore(embed_mdl, repo_name)

if not os.path.exists(get_cache_path() + f'/{repo_name}.faiss'):
is_faiss_installed: bool = faiss_installed()
if not is_faiss_installed:
question = [
inquirer.List(
'install',
message='FAISS is not installed. Do you want to install it?',
choices=['Yes', 'No'],
default='Yes'
)
]
answer: dict[str, str] = inquirer.prompt(question)
if answer['install'] == 'Yes':
question = [
inquirer.List(
"faiss-installation",
message="Please select the appropriate option to install FAISS. \
Use gpu if your system supports CUDA",
choices=[
FaissModel.FAISS_CPU.value,
FaissModel.FAISS_GPU.value,
],
default=FaissModel.FAISS_CPU.value,
)
]
answer: dict[str, str] = inquirer.prompt(question)
if answer['faiss-installation'] == 'faiss-cpu':
os.system('pip install faiss-cpu')
else:
os.system('pip install faiss-gpu')
else:
print('FAISS is required for this app to work')
sys.exit(1)
# all desired files in the git repository tree
files: list[str] = recursive_load_files()
docs: List[Document] = parse_code_files(files)
vec_store.idx_docs(docs)
save_vec_cache(vec_store.vec_cache, f'{repo_name}.json')

vec_store.load_docs()
chat_mdl = ChatOpenAI(model=conf['chat_model'], api_key=conf['api_key'], temperature=0.9,
max_tokens=2048)
mem = ConversationSummaryMemory(
llm=chat_mdl, memory_key='chat_history', return_messages=True
)
qa: BaseConversationalRetrievalChain = ConversationalRetrievalChain.from_llm(
chat_mdl, retriever=vec_store.retrieval, memory=mem)

try:
continue_chat = True
while continue_chat:
question: str = input(conf['username'] + ': ')
answer = qa(question)
print(repo_name + ': ' + answer['answer'])

choice: str = (
input(
'[C]ontinue chatting, [R]eset chat history, or [Q]uit? '
).strip().upper()
)
if choice == 'C':
continue
if choice == 'R':
mem.clear()
continue
if choice == 'Q':
continue_chat = False
except KeyboardInterrupt:
print('\n✌')


if __name__ == '__main__':
main()
34 changes: 23 additions & 11 deletions senior_swe_ai/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def config_init() -> None:

while api_validate is False:
api_key: str = getpass.getpass('Enter your OpenAI API key: ')
api_validate: bool = validate_api_key(api_key)
api_validate: bool = validate_api_key(api_key.strip())
if api_validate is False:
print('Invalid API key. Please try again.')

Expand All @@ -71,7 +71,7 @@ def config_init() -> None:

conf: dict[str, str] = {
'api_key': api_key,
'username': get_username(),
'username': get_username().upper(),
'embed_model': answers['embed_model'],
'chat_model': 'gpt-3.5-turbo'
}
Expand All @@ -83,17 +83,19 @@ def validate_api_key(api_key: str) -> bool:

try:
# Make a simple request to the API
client = openai.OpenAI()
openai.api_key = api_key
client = openai.OpenAI(api_key=api_key)
# openai.api_key = api_key
client.embeddings.create(
input="A test request to validate the API key",
model="text-embedding-ada-002"
)
return True
except openai.AuthenticationError:
except openai.AuthenticationError as e:
# If an AuthenticationError is raised, the API key is invalid
print(f"AuthenticationError: {e}")
return False
except openai.OpenAIError:
except openai.OpenAIError as e:
print(f"OpenAIError: {e}")
return False


Expand All @@ -107,16 +109,26 @@ def save_conf(conf) -> None:
def append_conf(conf: dict[Any, Any]) -> None:
""" Append the configuration to the file """
conf_file_path: str = get_config_path()
with open(conf_file_path, 'a', encoding='utf-8') as conf_file:
toml.dump(conf, conf_file)
conf_item: dict[Any, Any] = load_conf()

# Update the existing configuration with the new configuration
conf_item.update(conf)

# Write the updated configuration back to the file
with open(conf_file_path, 'w', encoding='utf-8') as conf_file:
toml.dump(conf_item, conf_file)


def load_conf() -> dict[Any, Any]:
""" Load the configuration from the file """
conf_file_path: str = get_config_path()
with open(conf_file_path, 'r', encoding='utf-8') as conf_file:
conf: dict[Any, Any] = toml.load(conf_file)
return conf
try:
with open(conf_file_path, 'r', encoding='utf-8') as conf_file:
conf: dict[Any, Any] = toml.load(conf_file)
return conf
except FileNotFoundError as e:
raise FileNotFoundError(
'Configuration file not found, `init` the app first.') from e


def get_username() -> str:
Expand Down
Loading