From b91d2fe91eb6f616d9d626ef41682d2003bb155d Mon Sep 17 00:00:00 2001 From: Tobias Wasner Date: Thu, 30 Jan 2025 18:12:57 +0100 Subject: [PATCH 01/15] Add solution to inconsistency pipeline & do prompt per template file --- app/pipeline/inconsistency_check_pipeline.py | 39 +++++++++++++------ .../prompts/inconsistency_check_prompts.py | 8 +++- 2 files changed, 34 insertions(+), 13 deletions(-) diff --git a/app/pipeline/inconsistency_check_pipeline.py b/app/pipeline/inconsistency_check_pipeline.py index d7f08045..d6d7ffa0 100644 --- a/app/pipeline/inconsistency_check_pipeline.py +++ b/app/pipeline/inconsistency_check_pipeline.py @@ -54,18 +54,33 @@ def __call__(self, dto: InconsistencyCheckPipelineExecutionDTO, **kwargs): logger.info("Running inconsistency check pipeline...") self.callback.in_progress() - template_repository = "\n".join( - f"\n{file_content}" - for file_path, file_content in dto.exercise.template_repository.items() - ) + overall_response: str = "" - response: str = self.pipeline.invoke( - { - "problem_statement": dto.exercise.problem_statement, - "template_repository": template_repository, - } - ) + for template_file_path, template_file_content in dto.exercise.template_repository.items(): + template_repository = "\n".join( + f"\n{template_file_content}" + + ) + + if template_file_path in dto.exercise.solution_repository.keys(): + solution_file_content = dto.exercise.solution_repository[template_file_path] + solution_repository = f"\n{solution_file_content}" + else: + solution_repository = "\n".join( + f"\n{file_content}" + for file_path, file_content in dto.exercise.solution_repository.items() + ) + logging.warning(f"Solution file for {template_file_path} not found, using all solution files: {dto.exercise.solution_repository.keys().join(', ')}") + + response: str = self.pipeline.invoke( + { + "problem_statement": dto.exercise.problem_statement, + "solution_repository": solution_repository, + "template_repository": template_repository, + } + ) - self._append_tokens(self.llm.tokens, PipelineEnum.IRIS_INCONSISTENCY_CHECK) + overall_response += ('\n' if overall_response else '') + response + self._append_tokens(self.llm.tokens, PipelineEnum.IRIS_INCONSISTENCY_CHECK) - self.callback.done(final_result=response, tokens=self.tokens) + self.callback.done(final_result=overall_response, tokens=self.tokens) diff --git a/app/pipeline/prompts/inconsistency_check_prompts.py b/app/pipeline/prompts/inconsistency_check_prompts.py index 02242f60..f2904fc5 100644 --- a/app/pipeline/prompts/inconsistency_check_prompts.py +++ b/app/pipeline/prompts/inconsistency_check_prompts.py @@ -2,7 +2,7 @@ As detail-oriented expert, find inconsistencies between the provided problem statement and the template repository of \ a programming exercise. -The student will use the the template repository to write code that solves the problem statement. +The student will use the the template repository as a framework to write code that solves the problem statement, it might be missing implementations which the students have to do. You are also provided with the solution repository, which should contain the full and correct implementation of the exercise. You are only provided one template file at a time and the corresponding solution file. Checks: - Given the problem statement, identify any missing or incorrect information in the template repository. @@ -23,6 +23,10 @@ {template_repository} + +{solution_repository} + + Be smart about it, give a structured and actionable response that an instructor can use to significantly improve the \ exercise. Clearly state where the inconsistency lies. Do not make up inconsistencies just to have something to say. @@ -30,5 +34,7 @@ will be confused and frustrated. This is a high stakes exam, so we need to be very thorough. You will be legally responsible for the quality of the exercise, so make sure you do the absolute best job possible, \ otherwise you will be held accountable in the court of law. Do not quote whole files! 🔫 +Do not mention that the template repository is missing requirements mentioned in the problem statement, that is not an inconsistency. The students are supposed to implement those parts, without any guidance such as comments or partial code. +Do only list the inconsistencies (unnumbered), no need to mention something before or after the list of inconsistencies. """ From d9c8fb5f56ba0e15c1ff7f3037acfbd4f9be88cb Mon Sep 17 00:00:00 2001 From: Tobias Wasner Date: Thu, 30 Jan 2025 19:32:54 +0100 Subject: [PATCH 02/15] Add classification to inconsistency check --- .../prompts/inconsistency_check_prompts.py | 22 +++++++++---------- 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/app/pipeline/prompts/inconsistency_check_prompts.py b/app/pipeline/prompts/inconsistency_check_prompts.py index f2904fc5..c8d472e9 100644 --- a/app/pipeline/prompts/inconsistency_check_prompts.py +++ b/app/pipeline/prompts/inconsistency_check_prompts.py @@ -1,15 +1,15 @@ basic_prompt = """\ -As detail-oriented expert, find inconsistencies between the provided problem statement and the template repository of \ +As detail-oriented expert, find inconsistencies between the provided problem statement and a template file of \ a programming exercise. -The student will use the the template repository as a framework to write code that solves the problem statement, it might be missing implementations which the students have to do. You are also provided with the solution repository, which should contain the full and correct implementation of the exercise. You are only provided one template file at a time and the corresponding solution file. +The student will use multiple template files to write code that solves the problem statement. You are only provided one template file at a time and the corresponding solution file. Checks: -- Given the problem statement, identify any missing or incorrect information in the template repository. -- Given the template repository, identify any missing or incorrect information in the problem statement. -- Ensure that the theme of the problem statement is consistent with the template repository. +- Given the problem statement, identify any missing or incorrect information in the template file. (TEMPLATE_MISSING_INFO) +- Given the template file, identify any missing or incorrect information in the problem statement. (PROBLEM_SATEMENT_MISSING_INFO) +- Ensure that the theme of the problem statement is consistent with the template files. (THEME_INCONSISTENCY) - Ensure that the problem statement is clear and concise and it covers everything that the student needs to know in \ -order to solve the exercise. +order to solve the exercise. (PROBLEM_STATEMENT_CLARITY) It is not an inconsistency, if the problem statement clearly states that the student is responsible for writing a \ specific part of the code. @@ -19,13 +19,13 @@ {problem_statement} - + - + {solution_repository} - + Be smart about it, give a structured and actionable response that an instructor can use to significantly improve the \ @@ -34,7 +34,5 @@ will be confused and frustrated. This is a high stakes exam, so we need to be very thorough. You will be legally responsible for the quality of the exercise, so make sure you do the absolute best job possible, \ otherwise you will be held accountable in the court of law. Do not quote whole files! 🔫 -Do not mention that the template repository is missing requirements mentioned in the problem statement, that is not an inconsistency. The students are supposed to implement those parts, without any guidance such as comments or partial code. -Do only list the inconsistencies (unnumbered), no need to mention something before or after the list of inconsistencies. """ From 3fc2f055f055cea13ebe531b3fb65c62475c94f1 Mon Sep 17 00:00:00 2001 From: Tobias Wasner Date: Thu, 30 Jan 2025 19:33:50 +0100 Subject: [PATCH 03/15] Reformat --- app/pipeline/inconsistency_check_pipeline.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/app/pipeline/inconsistency_check_pipeline.py b/app/pipeline/inconsistency_check_pipeline.py index d6d7ffa0..d92567f4 100644 --- a/app/pipeline/inconsistency_check_pipeline.py +++ b/app/pipeline/inconsistency_check_pipeline.py @@ -56,21 +56,27 @@ def __call__(self, dto: InconsistencyCheckPipelineExecutionDTO, **kwargs): overall_response: str = "" - for template_file_path, template_file_content in dto.exercise.template_repository.items(): + for ( + template_file_path, + template_file_content, + ) in dto.exercise.template_repository.items(): template_repository = "\n".join( f"\n{template_file_content}" - ) if template_file_path in dto.exercise.solution_repository.keys(): - solution_file_content = dto.exercise.solution_repository[template_file_path] + solution_file_content = dto.exercise.solution_repository[ + template_file_path + ] solution_repository = f"\n{solution_file_content}" else: solution_repository = "\n".join( f"\n{file_content}" for file_path, file_content in dto.exercise.solution_repository.items() ) - logging.warning(f"Solution file for {template_file_path} not found, using all solution files: {dto.exercise.solution_repository.keys().join(', ')}") + logging.warning( + f"Solution file for {template_file_path} not found, using all solution files: {dto.exercise.solution_repository.keys().join(', ')}" + ) response: str = self.pipeline.invoke( { @@ -80,7 +86,7 @@ def __call__(self, dto: InconsistencyCheckPipelineExecutionDTO, **kwargs): } ) - overall_response += ('\n' if overall_response else '') + response + overall_response += ("\n" if overall_response else "") + response self._append_tokens(self.llm.tokens, PipelineEnum.IRIS_INCONSISTENCY_CHECK) self.callback.done(final_result=overall_response, tokens=self.tokens) From 8e62840d266e7f43aad3f002a999662c369fe86b Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Fri, 31 Jan 2025 22:51:53 +0100 Subject: [PATCH 04/15] wtf --- app/pipeline/inconsistency_check_pipeline.py | 137 +++++++++++++----- .../prompts/inconsistency_check_prompts.py | 31 +++- 2 files changed, 129 insertions(+), 39 deletions(-) diff --git a/app/pipeline/inconsistency_check_pipeline.py b/app/pipeline/inconsistency_check_pipeline.py index d92567f4..79f99b50 100644 --- a/app/pipeline/inconsistency_check_pipeline.py +++ b/app/pipeline/inconsistency_check_pipeline.py @@ -1,10 +1,9 @@ import logging -from typing import Optional +from typing import Dict, List, Literal, Optional -from langchain_core.output_parsers import StrOutputParser from langchain_core.prompts import PromptTemplate -from langchain_core.runnables import Runnable from langsmith import traceable +from pydantic import BaseModel, Field from app.common.PipelineEnum import PipelineEnum from app.domain import InconsistencyCheckPipelineExecutionDTO @@ -12,20 +11,80 @@ from app.llm.langchain.iris_langchain_chat_model import IrisLangchainChatModel from app.pipeline import Pipeline from app.web.status.status_update import InconsistencyCheckCallback -from app.pipeline.prompts.inconsistency_check_prompts import basic_prompt +from app.pipeline.prompts.inconsistency_check_prompts import solver_prompt, scorer_prompt logger = logging.getLogger(__name__) +IssueLocation = Literal["problem_statement", "template_file", "solution_file", "uncertain"] + + +class ConsistencyIssue(BaseModel): + """The consistency issue found in the programming exercise.""" + + location: IssueLocation = Field( + description="The location of the consistency issue in the programming exercise." + ) + + title: str = Field( + description="The title of the consistency issue found in the programming exercise." + ) + + description: str = Field( + description="The description of the consistency issue found in the programming exercise." + ) + + suggestion: str = Field( + description="The suggestion to fix the consistency issue found in the programming exercise." + ) + + +class GuessConsistencyIssues(BaseModel): + """Submit multiple consistency issues found in the programming exercise as guesses.""" + + reasoning: str = Field( + description="The reasoning behind the submitted guesses. Explain how you arrived at these consistency issues." + ) + + issues: List[ConsistencyIssue] = Field( + description="The list of consistency issues found in the programming exercise." + ) + + +class ScoredConsistencyIssue(BaseModel): + """The scored consistency issue found in the programming exercise.""" + + candidate: ConsistencyIssue = Field( + description="The candidate consistency issue under review." + ) + + score_reasoning: str = Field( + description="The reasoning behind the score given to the consistency issue. Weighting factors should be explained." + ) + + score: float = Field( + description="The score given to the consistency issue. The score should be between 0.00 and 1.00. The higher the score, the more severe the issue, lower scores are nitpicks." + ) + +class ScoredConsistencyIssues(BaseModel): + """Submit multiple scored consistency issues found in the programming exercise.""" + + issues: List[ScoredConsistencyIssue] = Field( + description="The list of scored consistency issues found in the programming exercise." + ) + + class InconsistencyCheckPipeline(Pipeline): - pipeline: Runnable - llm: IrisLangchainChatModel + solver_llm: IrisLangchainChatModel + scorer_llm: IrisLangchainChatModel callback: InconsistencyCheckCallback def __init__(self, callback: Optional[InconsistencyCheckCallback] = None): super().__init__(implementation_id="inconsistency_check_pipeline") completion_args = CompletionArguments(temperature=0, max_tokens=2000) - self.llm = IrisLangchainChatModel( + + self.solver_prompt = PromptTemplate.from_template(solver_prompt) + self.solver_llm = IrisLangchainChatModel( request_handler=CapabilityRequestHandler( requirements=RequirementList( gpt_version_equivalent=4.5, @@ -33,12 +92,25 @@ def __init__(self, callback: Optional[InconsistencyCheckCallback] = None): ) ), completion_args=completion_args, - ) - self.prompt = PromptTemplate.from_template(basic_prompt) - self.pipeline = self.prompt | self.llm | StrOutputParser() + ).with_structured_output(GuessConsistencyIssues) + self.solver = self.solver_prompt | self.solver_llm + + self.scorer_prompt = PromptTemplate.from_template(scorer_prompt) + self.scorer_llm = IrisLangchainChatModel( + request_handler=CapabilityRequestHandler( + requirements=RequirementList( + gpt_version_equivalent=4.5, + context_length=16385, + ) + ), + completion_args=completion_args, + ).with_structured_output(ScoredConsistencyIssues) + self.scorer = self.scorer_prompt | self.scorer_llm + self.callback = callback self.tokens = [] + @traceable(name="Inconsistency Check Pipeline") def __call__(self, dto: InconsistencyCheckPipelineExecutionDTO, **kwargs): """ @@ -54,39 +126,28 @@ def __call__(self, dto: InconsistencyCheckPipelineExecutionDTO, **kwargs): logger.info("Running inconsistency check pipeline...") self.callback.in_progress() - overall_response: str = "" + consistency_issues: Dict[str, List[ConsistencyIssue]] = {} - for ( - template_file_path, - template_file_content, - ) in dto.exercise.template_repository.items(): - template_repository = "\n".join( - f"\n{template_file_content}" - ) + file_paths = set(dto.exercise.template_repository.keys()) | set(dto.exercise.solution_repository.keys()) + for file_path in file_paths: + template_file = dto.exercise.template_repository.get(file_path, "empty file") + solution_file = dto.exercise.solution_repository.get(file_path, "empty file") - if template_file_path in dto.exercise.solution_repository.keys(): - solution_file_content = dto.exercise.solution_repository[ - template_file_path - ] - solution_repository = f"\n{solution_file_content}" - else: - solution_repository = "\n".join( - f"\n{file_content}" - for file_path, file_content in dto.exercise.solution_repository.items() - ) - logging.warning( - f"Solution file for {template_file_path} not found, using all solution files: {dto.exercise.solution_repository.keys().join(', ')}" - ) - - response: str = self.pipeline.invoke( + response: GuessConsistencyIssues = self.solver.invoke( { "problem_statement": dto.exercise.problem_statement, - "solution_repository": solution_repository, - "template_repository": template_repository, + "file_path": file_path, + "template_file": template_file, + "solution_file": solution_file, } ) - overall_response += ("\n" if overall_response else "") + response - self._append_tokens(self.llm.tokens, PipelineEnum.IRIS_INCONSISTENCY_CHECK) + logger.info(f"Consistency issues found in {file_path}: {response}") + # consistency_issues[file_path] = response.issues + + + self._append_tokens(self.solver_llm.tokens, PipelineEnum.IRIS_INCONSISTENCY_CHECK) + self._append_tokens(self.scorer_llm.tokens, PipelineEnum.IRIS_INCONSISTENCY_CHECK) - self.callback.done(final_result=overall_response, tokens=self.tokens) + #TODO + self.callback.done(final_result="", tokens=self.tokens) diff --git a/app/pipeline/prompts/inconsistency_check_prompts.py b/app/pipeline/prompts/inconsistency_check_prompts.py index c8d472e9..8b321143 100644 --- a/app/pipeline/prompts/inconsistency_check_prompts.py +++ b/app/pipeline/prompts/inconsistency_check_prompts.py @@ -1,4 +1,33 @@ -basic_prompt = """\ +solver_prompt = """\ + +You are a detail-oriented expert instructor at an Ivy League university ensuring the quality of programming exercises. \ +Your task is to find consistency issues as part of the exercise creation process to make sure that the exercise is \ +without any errors or inconsistencies that might confuse students. Your teaching assistants will use your feedback to \ +improve the exercise. + +Parts of a programming exercise: + - Problem statement: The description of the exercise containing tasks that the student needs to solve. + - Template repository: The starting point from which the student will start solving the exercise. + - Solution repository: The sample solution set by the instructor to compare the student's solution against. + +To not overburden you, you will be provided with the problem statement and one of the template plus solution files \ +at a time. You need to compare the problem statement with the template file and identify any consistency issues. + + + +{problem_statement} + + + +{template_file} + + + +{solution_file} + +""" + +scorer_prompt = """\ As detail-oriented expert, find inconsistencies between the provided problem statement and a template file of \ a programming exercise. From f556e1b2b2752fada6480d937143e2ac2ed0545d Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Fri, 31 Jan 2025 23:29:15 +0100 Subject: [PATCH 05/15] simplify --- app/pipeline/inconsistency_check_pipeline.py | 71 +++++++------------ .../prompts/inconsistency_check_prompts.py | 39 ---------- 2 files changed, 25 insertions(+), 85 deletions(-) diff --git a/app/pipeline/inconsistency_check_pipeline.py b/app/pipeline/inconsistency_check_pipeline.py index 79f99b50..fc87f81f 100644 --- a/app/pipeline/inconsistency_check_pipeline.py +++ b/app/pipeline/inconsistency_check_pipeline.py @@ -11,7 +11,7 @@ from app.llm.langchain.iris_langchain_chat_model import IrisLangchainChatModel from app.pipeline import Pipeline from app.web.status.status_update import InconsistencyCheckCallback -from app.pipeline.prompts.inconsistency_check_prompts import solver_prompt, scorer_prompt +from app.pipeline.prompts.inconsistency_check_prompts import solver_prompt logger = logging.getLogger(__name__) @@ -51,32 +51,8 @@ class GuessConsistencyIssues(BaseModel): ) -class ScoredConsistencyIssue(BaseModel): - """The scored consistency issue found in the programming exercise.""" - - candidate: ConsistencyIssue = Field( - description="The candidate consistency issue under review." - ) - - score_reasoning: str = Field( - description="The reasoning behind the score given to the consistency issue. Weighting factors should be explained." - ) - - score: float = Field( - description="The score given to the consistency issue. The score should be between 0.00 and 1.00. The higher the score, the more severe the issue, lower scores are nitpicks." - ) - -class ScoredConsistencyIssues(BaseModel): - """Submit multiple scored consistency issues found in the programming exercise.""" - - issues: List[ScoredConsistencyIssue] = Field( - description="The list of scored consistency issues found in the programming exercise." - ) - - class InconsistencyCheckPipeline(Pipeline): solver_llm: IrisLangchainChatModel - scorer_llm: IrisLangchainChatModel callback: InconsistencyCheckCallback def __init__(self, callback: Optional[InconsistencyCheckCallback] = None): @@ -95,18 +71,6 @@ def __init__(self, callback: Optional[InconsistencyCheckCallback] = None): ).with_structured_output(GuessConsistencyIssues) self.solver = self.solver_prompt | self.solver_llm - self.scorer_prompt = PromptTemplate.from_template(scorer_prompt) - self.scorer_llm = IrisLangchainChatModel( - request_handler=CapabilityRequestHandler( - requirements=RequirementList( - gpt_version_equivalent=4.5, - context_length=16385, - ) - ), - completion_args=completion_args, - ).with_structured_output(ScoredConsistencyIssues) - self.scorer = self.scorer_prompt | self.scorer_llm - self.callback = callback self.tokens = [] @@ -129,6 +93,7 @@ def __call__(self, dto: InconsistencyCheckPipelineExecutionDTO, **kwargs): consistency_issues: Dict[str, List[ConsistencyIssue]] = {} file_paths = set(dto.exercise.template_repository.keys()) | set(dto.exercise.solution_repository.keys()) + for file_path in file_paths: template_file = dto.exercise.template_repository.get(file_path, "empty file") solution_file = dto.exercise.solution_repository.get(file_path, "empty file") @@ -142,12 +107,26 @@ def __call__(self, dto: InconsistencyCheckPipelineExecutionDTO, **kwargs): } ) - logger.info(f"Consistency issues found in {file_path}: {response}") - # consistency_issues[file_path] = response.issues - - - self._append_tokens(self.solver_llm.tokens, PipelineEnum.IRIS_INCONSISTENCY_CHECK) - self._append_tokens(self.scorer_llm.tokens, PipelineEnum.IRIS_INCONSISTENCY_CHECK) - - #TODO - self.callback.done(final_result="", tokens=self.tokens) + if response: + consistency_issues[file_path] = response.issues + else: + logger.error(f"Failed to parse response for {file_path}, skipping...") + + # I'm not fixing this + # self._append_tokens(self.solver_llm.tokens, PipelineEnum.IRIS_INCONSISTENCY_CHECK) + # self._append_tokens(self.scorer_llm.tokens, PipelineEnum.IRIS_INCONSISTENCY_CHECK) + + final_result = '' + for file_path, issues in consistency_issues.items(): + if not issues: + continue + + if final_result: + final_result += '\n' + final_result += f"### File: `{file_path}`\n" + for issue in issues: + final_result += f"#### {issue.title}\n" + final_result += f"**Description**:\n{issue.description}\n" + final_result += f"**Suggestion**:\n{issue.suggestion}\n" + + self.callback.done(final_result=final_result, tokens=self.tokens) diff --git a/app/pipeline/prompts/inconsistency_check_prompts.py b/app/pipeline/prompts/inconsistency_check_prompts.py index 8b321143..186f7884 100644 --- a/app/pipeline/prompts/inconsistency_check_prompts.py +++ b/app/pipeline/prompts/inconsistency_check_prompts.py @@ -26,42 +26,3 @@ {solution_file} """ - -scorer_prompt = """\ - -As detail-oriented expert, find inconsistencies between the provided problem statement and a template file of \ -a programming exercise. -The student will use multiple template files to write code that solves the problem statement. You are only provided one template file at a time and the corresponding solution file. - -Checks: -- Given the problem statement, identify any missing or incorrect information in the template file. (TEMPLATE_MISSING_INFO) -- Given the template file, identify any missing or incorrect information in the problem statement. (PROBLEM_SATEMENT_MISSING_INFO) -- Ensure that the theme of the problem statement is consistent with the template files. (THEME_INCONSISTENCY) -- Ensure that the problem statement is clear and concise and it covers everything that the student needs to know in \ -order to solve the exercise. (PROBLEM_STATEMENT_CLARITY) - -It is not an inconsistency, if the problem statement clearly states that the student is responsible for writing a \ -specific part of the code. - - - -{problem_statement} - - - - - -{solution_repository} - - - -Be smart about it, give a structured and actionable response that an instructor can use to significantly improve the \ -exercise. Clearly state where the inconsistency lies. Do not make up inconsistencies just to have something to say. -It needs to be very comprehensive and detailed, imagine some inconsistencies slipped through, students in the exam \ -will be confused and frustrated. This is a high stakes exam, so we need to be very thorough. -You will be legally responsible for the quality of the exercise, so make sure you do the absolute best job possible, \ -otherwise you will be held accountable in the court of law. Do not quote whole files! 🔫 - -""" From b31247a17fae0f3cf7ec255ca32a33db8a5cbf6d Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Mon, 3 Feb 2025 09:32:37 +0100 Subject: [PATCH 06/15] remove structured output parsing and add summary step --- app/pipeline/inconsistency_check_pipeline.py | 101 ++++++------------ .../prompts/inconsistency_check_prompts.py | 34 ++++++ 2 files changed, 68 insertions(+), 67 deletions(-) diff --git a/app/pipeline/inconsistency_check_pipeline.py b/app/pipeline/inconsistency_check_pipeline.py index fc87f81f..37aee7f2 100644 --- a/app/pipeline/inconsistency_check_pipeline.py +++ b/app/pipeline/inconsistency_check_pipeline.py @@ -1,9 +1,9 @@ import logging -from typing import Dict, List, Literal, Optional +from typing import Dict, Optional +from langchain_core.runnables import Runnable from langchain_core.prompts import PromptTemplate from langsmith import traceable -from pydantic import BaseModel, Field from app.common.PipelineEnum import PipelineEnum from app.domain import InconsistencyCheckPipelineExecutionDTO @@ -11,56 +11,23 @@ from app.llm.langchain.iris_langchain_chat_model import IrisLangchainChatModel from app.pipeline import Pipeline from app.web.status.status_update import InconsistencyCheckCallback -from app.pipeline.prompts.inconsistency_check_prompts import solver_prompt +from app.pipeline.prompts.inconsistency_check_prompts import solver_prompt, prettify_prompt logger = logging.getLogger(__name__) -IssueLocation = Literal["problem_statement", "template_file", "solution_file", "uncertain"] - - -class ConsistencyIssue(BaseModel): - """The consistency issue found in the programming exercise.""" - - location: IssueLocation = Field( - description="The location of the consistency issue in the programming exercise." - ) - - title: str = Field( - description="The title of the consistency issue found in the programming exercise." - ) - - description: str = Field( - description="The description of the consistency issue found in the programming exercise." - ) - - suggestion: str = Field( - description="The suggestion to fix the consistency issue found in the programming exercise." - ) - - -class GuessConsistencyIssues(BaseModel): - """Submit multiple consistency issues found in the programming exercise as guesses.""" - - reasoning: str = Field( - description="The reasoning behind the submitted guesses. Explain how you arrived at these consistency issues." - ) - - issues: List[ConsistencyIssue] = Field( - description="The list of consistency issues found in the programming exercise." - ) - - class InconsistencyCheckPipeline(Pipeline): - solver_llm: IrisLangchainChatModel + llm: IrisLangchainChatModel callback: InconsistencyCheckCallback + + solver: Runnable + prettify: Runnable def __init__(self, callback: Optional[InconsistencyCheckCallback] = None): super().__init__(implementation_id="inconsistency_check_pipeline") completion_args = CompletionArguments(temperature=0, max_tokens=2000) - self.solver_prompt = PromptTemplate.from_template(solver_prompt) - self.solver_llm = IrisLangchainChatModel( + self.llm = IrisLangchainChatModel( request_handler=CapabilityRequestHandler( requirements=RequirementList( gpt_version_equivalent=4.5, @@ -68,8 +35,12 @@ def __init__(self, callback: Optional[InconsistencyCheckCallback] = None): ) ), completion_args=completion_args, - ).with_structured_output(GuessConsistencyIssues) - self.solver = self.solver_prompt | self.solver_llm + ) + self.solver_prompt = PromptTemplate.from_template(solver_prompt) + self.solver = self.solver_prompt | self.llm + + self.prettify_prompt = PromptTemplate.from_template(prettify_prompt) + self.prettify = self.prettify_prompt | self.llm self.callback = callback self.tokens = [] @@ -90,7 +61,7 @@ def __call__(self, dto: InconsistencyCheckPipelineExecutionDTO, **kwargs): logger.info("Running inconsistency check pipeline...") self.callback.in_progress() - consistency_issues: Dict[str, List[ConsistencyIssue]] = {} + consistency_issues: Dict[str, str] = {} file_paths = set(dto.exercise.template_repository.keys()) | set(dto.exercise.solution_repository.keys()) @@ -98,7 +69,7 @@ def __call__(self, dto: InconsistencyCheckPipelineExecutionDTO, **kwargs): template_file = dto.exercise.template_repository.get(file_path, "empty file") solution_file = dto.exercise.solution_repository.get(file_path, "empty file") - response: GuessConsistencyIssues = self.solver.invoke( + response = self.solver.invoke( { "problem_statement": dto.exercise.problem_statement, "file_path": file_path, @@ -107,26 +78,22 @@ def __call__(self, dto: InconsistencyCheckPipelineExecutionDTO, **kwargs): } ) - if response: - consistency_issues[file_path] = response.issues - else: - logger.error(f"Failed to parse response for {file_path}, skipping...") + consistency_issues[file_path] = response.content + + formatted_consistency_issues = '\n'.join([ + f"\n{issues}\n" + for file_path, issues + in consistency_issues.items() + ]) - # I'm not fixing this - # self._append_tokens(self.solver_llm.tokens, PipelineEnum.IRIS_INCONSISTENCY_CHECK) - # self._append_tokens(self.scorer_llm.tokens, PipelineEnum.IRIS_INCONSISTENCY_CHECK) - - final_result = '' - for file_path, issues in consistency_issues.items(): - if not issues: - continue - - if final_result: - final_result += '\n' - final_result += f"### File: `{file_path}`\n" - for issue in issues: - final_result += f"#### {issue.title}\n" - final_result += f"**Description**:\n{issue.description}\n" - final_result += f"**Suggestion**:\n{issue.suggestion}\n" - - self.callback.done(final_result=final_result, tokens=self.tokens) + final_response = self.prettify.invoke( + { + "problem_statement": dto.exercise.problem_statement, + "consistency_issues": formatted_consistency_issues, + } + ) + + logger.info(final_response.content) + + self._append_tokens(self.llm.tokens, PipelineEnum.IRIS_INCONSISTENCY_CHECK) + self.callback.done(final_result=final_response.content, tokens=self.tokens) diff --git a/app/pipeline/prompts/inconsistency_check_prompts.py b/app/pipeline/prompts/inconsistency_check_prompts.py index 186f7884..a9fb26b5 100644 --- a/app/pipeline/prompts/inconsistency_check_prompts.py +++ b/app/pipeline/prompts/inconsistency_check_prompts.py @@ -25,4 +25,38 @@ {solution_file} + + +Respond with any potential consistency issues found in the exercise formatted in markdown. \ +Just provide the easily digestible formatted markdown without other explanations. It is fine to provide no issues if +you are confident that the files are consistent. + +""" + +prettify_prompt = """\ + +You are a detail-oriented expert instructor at an Ivy League university ensuring the quality of programming exercises. \ +Your task is to find consistency issues as part of the exercise creation process to make sure that the exercise is \ +without any errors or inconsistencies that might confuse students. +In a previous step you already found potential consistency issues as part of the exercise creation process on a file \ +level. Now, you need to summarize the issues found in the exercise so the teaching assistants can fix them. + +Parts of a programming exercise: + - Problem statement: The description of the exercise containing tasks that the student needs to solve. + - Template repository: The starting point from which the student will start solving the exercise. + - Solution repository: The sample solution set by the instructor to compare the student's solution against. + + + +{problem_statement} + + + +{consistency_issues} + + + +Respond with a summary of the consistency issues found in the exercise. \ +Just provide the easily digestible formatted markdown without other explanations. + """ From d6d3beb442f7bae1ac4ca6447c3633bf1803cbbd Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Mon, 3 Feb 2025 09:48:55 +0100 Subject: [PATCH 07/15] improve llm handling --- app/pipeline/inconsistency_check_pipeline.py | 50 ++++++++++++-------- 1 file changed, 29 insertions(+), 21 deletions(-) diff --git a/app/pipeline/inconsistency_check_pipeline.py b/app/pipeline/inconsistency_check_pipeline.py index 37aee7f2..84702f4f 100644 --- a/app/pipeline/inconsistency_check_pipeline.py +++ b/app/pipeline/inconsistency_check_pipeline.py @@ -1,7 +1,7 @@ import logging from typing import Dict, Optional -from langchain_core.runnables import Runnable +from langchain_core.runnables import Runnable, RunnableParallel from langchain_core.prompts import PromptTemplate from langsmith import traceable @@ -61,39 +61,47 @@ def __call__(self, dto: InconsistencyCheckPipelineExecutionDTO, **kwargs): logger.info("Running inconsistency check pipeline...") self.callback.in_progress() + # First, for each file in the exercise, we will check for consistency issues via the solver pipeline consistency_issues: Dict[str, str] = {} - file_paths = set(dto.exercise.template_repository.keys()) | set(dto.exercise.solution_repository.keys()) + solver_inputs = [ + { + "file_path": file_path, + "problem_statement": dto.exercise.problem_statement, + "template_file": dto.exercise.template_repository.get(file_path, "empty file"), + "solution_file": dto.exercise.solution_repository.get(file_path, "empty file"), + } + for file_path in file_paths + ] + file_responses = self.solver.map().invoke(solver_inputs) + consistency_issues = { + file_path: response.content + for file_path, response in zip(file_paths, file_responses) + } - for file_path in file_paths: - template_file = dto.exercise.template_repository.get(file_path, "empty file") - solution_file = dto.exercise.solution_repository.get(file_path, "empty file") - - response = self.solver.invoke( - { - "problem_statement": dto.exercise.problem_statement, - "file_path": file_path, - "template_file": template_file, - "solution_file": solution_file, - } - ) - - consistency_issues[file_path] = response.content - + # Second, we will prettify the consistency issues and provide a summary using the prettify pipeline formatted_consistency_issues = '\n'.join([ f"\n{issues}\n" for file_path, issues in consistency_issues.items() ]) - - final_response = self.prettify.invoke( + summary_response = self.prettify.invoke( { "problem_statement": dto.exercise.problem_statement, "consistency_issues": formatted_consistency_issues, } ) - logger.info(final_response.content) + result = summary_response.content.strip() + + # remove ``` from start and end if exists + if result.startswith("```") and result.endswith("```"): + result = result[3:-3] + if result.startswith("markdown"): + result = result[8:] + result = result.strip() + + logger.info(result) self._append_tokens(self.llm.tokens, PipelineEnum.IRIS_INCONSISTENCY_CHECK) - self.callback.done(final_result=final_response.content, tokens=self.tokens) + self.callback.done(final_result=result, tokens=self.tokens) From 47e5762484a563be0bed3cd5a0f103dfb5e9039c Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Mon, 3 Feb 2025 09:53:31 +0100 Subject: [PATCH 08/15] black --- app/pipeline/inconsistency_check_pipeline.py | 39 ++++++++++++-------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/app/pipeline/inconsistency_check_pipeline.py b/app/pipeline/inconsistency_check_pipeline.py index 84702f4f..912c2c03 100644 --- a/app/pipeline/inconsistency_check_pipeline.py +++ b/app/pipeline/inconsistency_check_pipeline.py @@ -11,7 +11,10 @@ from app.llm.langchain.iris_langchain_chat_model import IrisLangchainChatModel from app.pipeline import Pipeline from app.web.status.status_update import InconsistencyCheckCallback -from app.pipeline.prompts.inconsistency_check_prompts import solver_prompt, prettify_prompt +from app.pipeline.prompts.inconsistency_check_prompts import ( + solver_prompt, + prettify_prompt, +) logger = logging.getLogger(__name__) @@ -19,7 +22,7 @@ class InconsistencyCheckPipeline(Pipeline): llm: IrisLangchainChatModel callback: InconsistencyCheckCallback - + solver: Runnable prettify: Runnable @@ -38,14 +41,13 @@ def __init__(self, callback: Optional[InconsistencyCheckCallback] = None): ) self.solver_prompt = PromptTemplate.from_template(solver_prompt) self.solver = self.solver_prompt | self.llm - + self.prettify_prompt = PromptTemplate.from_template(prettify_prompt) self.prettify = self.prettify_prompt | self.llm self.callback = callback self.tokens = [] - @traceable(name="Inconsistency Check Pipeline") def __call__(self, dto: InconsistencyCheckPipelineExecutionDTO, **kwargs): """ @@ -63,13 +65,19 @@ def __call__(self, dto: InconsistencyCheckPipelineExecutionDTO, **kwargs): # First, for each file in the exercise, we will check for consistency issues via the solver pipeline consistency_issues: Dict[str, str] = {} - file_paths = set(dto.exercise.template_repository.keys()) | set(dto.exercise.solution_repository.keys()) + file_paths = set(dto.exercise.template_repository.keys()) | set( + dto.exercise.solution_repository.keys() + ) solver_inputs = [ { "file_path": file_path, "problem_statement": dto.exercise.problem_statement, - "template_file": dto.exercise.template_repository.get(file_path, "empty file"), - "solution_file": dto.exercise.solution_repository.get(file_path, "empty file"), + "template_file": dto.exercise.template_repository.get( + file_path, "empty file" + ), + "solution_file": dto.exercise.solution_repository.get( + file_path, "empty file" + ), } for file_path in file_paths ] @@ -78,22 +86,23 @@ def __call__(self, dto: InconsistencyCheckPipelineExecutionDTO, **kwargs): file_path: response.content for file_path, response in zip(file_paths, file_responses) } - + # Second, we will prettify the consistency issues and provide a summary using the prettify pipeline - formatted_consistency_issues = '\n'.join([ - f"\n{issues}\n" - for file_path, issues - in consistency_issues.items() - ]) + formatted_consistency_issues = "\n".join( + [ + f"\n{issues}\n" + for file_path, issues in consistency_issues.items() + ] + ) summary_response = self.prettify.invoke( { "problem_statement": dto.exercise.problem_statement, "consistency_issues": formatted_consistency_issues, } ) - + result = summary_response.content.strip() - + # remove ``` from start and end if exists if result.startswith("```") and result.endswith("```"): result = result[3:-3] From 8f3020a63ab10a7c69a3e55f64b2e8271d48784f Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Mon, 3 Feb 2025 09:56:28 +0100 Subject: [PATCH 09/15] flake --- app/pipeline/inconsistency_check_pipeline.py | 2 +- app/pipeline/prompts/inconsistency_check_prompts.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/app/pipeline/inconsistency_check_pipeline.py b/app/pipeline/inconsistency_check_pipeline.py index 912c2c03..36ce026d 100644 --- a/app/pipeline/inconsistency_check_pipeline.py +++ b/app/pipeline/inconsistency_check_pipeline.py @@ -1,7 +1,7 @@ import logging from typing import Dict, Optional -from langchain_core.runnables import Runnable, RunnableParallel +from langchain_core.runnables import Runnable from langchain_core.prompts import PromptTemplate from langsmith import traceable diff --git a/app/pipeline/prompts/inconsistency_check_prompts.py b/app/pipeline/prompts/inconsistency_check_prompts.py index a9fb26b5..74e38162 100644 --- a/app/pipeline/prompts/inconsistency_check_prompts.py +++ b/app/pipeline/prompts/inconsistency_check_prompts.py @@ -28,7 +28,7 @@ Respond with any potential consistency issues found in the exercise formatted in markdown. \ -Just provide the easily digestible formatted markdown without other explanations. It is fine to provide no issues if +Just provide the easily digestible formatted markdown without other explanations. It is fine to provide no issues if \ you are confident that the files are consistent. """ From 489f82a816144e43622b254daae667427234e6d8 Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Mon, 3 Feb 2025 10:00:22 +0100 Subject: [PATCH 10/15] remove log --- app/pipeline/inconsistency_check_pipeline.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/app/pipeline/inconsistency_check_pipeline.py b/app/pipeline/inconsistency_check_pipeline.py index 36ce026d..9c684cfc 100644 --- a/app/pipeline/inconsistency_check_pipeline.py +++ b/app/pipeline/inconsistency_check_pipeline.py @@ -110,7 +110,5 @@ def __call__(self, dto: InconsistencyCheckPipelineExecutionDTO, **kwargs): result = result[8:] result = result.strip() - logger.info(result) - self._append_tokens(self.llm.tokens, PipelineEnum.IRIS_INCONSISTENCY_CHECK) self.callback.done(final_result=result, tokens=self.tokens) From 3d7185a874c38216afc8edc8bcd423d0941a8a1a Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Mon, 3 Feb 2025 10:15:59 +0100 Subject: [PATCH 11/15] remove heading --- app/pipeline/inconsistency_check_pipeline.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/app/pipeline/inconsistency_check_pipeline.py b/app/pipeline/inconsistency_check_pipeline.py index 9c684cfc..14774ce9 100644 --- a/app/pipeline/inconsistency_check_pipeline.py +++ b/app/pipeline/inconsistency_check_pipeline.py @@ -1,4 +1,6 @@ import logging +import re + from typing import Dict, Optional from langchain_core.runnables import Runnable @@ -103,12 +105,17 @@ def __call__(self, dto: InconsistencyCheckPipelineExecutionDTO, **kwargs): result = summary_response.content.strip() - # remove ``` from start and end if exists + # Remove ``` from start and end if exists if result.startswith("```") and result.endswith("```"): result = result[3:-3] if result.startswith("markdown"): result = result[8:] result = result.strip() + + # Remove first heading + result = re.sub(r"^#\s.*?\n", "", result) + + logger.info(f"Consistency issues found: {result}") self._append_tokens(self.llm.tokens, PipelineEnum.IRIS_INCONSISTENCY_CHECK) self.callback.done(final_result=result, tokens=self.tokens) From efe343b6faf72a0b34a4fffc2e01c33dc605047c Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Mon, 3 Feb 2025 10:28:05 +0100 Subject: [PATCH 12/15] fix heading --- app/pipeline/inconsistency_check_pipeline.py | 7 +++---- app/pipeline/prompts/inconsistency_check_prompts.py | 4 ++-- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/app/pipeline/inconsistency_check_pipeline.py b/app/pipeline/inconsistency_check_pipeline.py index 14774ce9..42f4c011 100644 --- a/app/pipeline/inconsistency_check_pipeline.py +++ b/app/pipeline/inconsistency_check_pipeline.py @@ -102,7 +102,7 @@ def __call__(self, dto: InconsistencyCheckPipelineExecutionDTO, **kwargs): "consistency_issues": formatted_consistency_issues, } ) - + result = summary_response.content.strip() # Remove ``` from start and end if exists @@ -112,10 +112,9 @@ def __call__(self, dto: InconsistencyCheckPipelineExecutionDTO, **kwargs): result = result[8:] result = result.strip() - # Remove first heading + # Remove first heading or heading containing 'Summary of Consistency Issues' result = re.sub(r"^#\s.*?\n", "", result) - - logger.info(f"Consistency issues found: {result}") + result = re.sub(r"^#+.*?Summary of Consistency Issues\s*\n", "", result) self._append_tokens(self.llm.tokens, PipelineEnum.IRIS_INCONSISTENCY_CHECK) self.callback.done(final_result=result, tokens=self.tokens) diff --git a/app/pipeline/prompts/inconsistency_check_prompts.py b/app/pipeline/prompts/inconsistency_check_prompts.py index 74e38162..406232e0 100644 --- a/app/pipeline/prompts/inconsistency_check_prompts.py +++ b/app/pipeline/prompts/inconsistency_check_prompts.py @@ -56,7 +56,7 @@ -Respond with a summary of the consistency issues found in the exercise. \ -Just provide the easily digestible formatted markdown without other explanations. +Respond with a summary of the consistency issues found in the exercise. Make it clear which file path contains the \ +issues. Just provide the easily digestible formatted markdown without other explanations. """ From 6d8693694e0a58782865343d7b72a2d78e65286c Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Mon, 3 Feb 2025 10:30:18 +0100 Subject: [PATCH 13/15] black --- app/pipeline/inconsistency_check_pipeline.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/pipeline/inconsistency_check_pipeline.py b/app/pipeline/inconsistency_check_pipeline.py index 42f4c011..0dfe36cf 100644 --- a/app/pipeline/inconsistency_check_pipeline.py +++ b/app/pipeline/inconsistency_check_pipeline.py @@ -102,7 +102,7 @@ def __call__(self, dto: InconsistencyCheckPipelineExecutionDTO, **kwargs): "consistency_issues": formatted_consistency_issues, } ) - + result = summary_response.content.strip() # Remove ``` from start and end if exists @@ -111,7 +111,7 @@ def __call__(self, dto: InconsistencyCheckPipelineExecutionDTO, **kwargs): if result.startswith("markdown"): result = result[8:] result = result.strip() - + # Remove first heading or heading containing 'Summary of Consistency Issues' result = re.sub(r"^#\s.*?\n", "", result) result = re.sub(r"^#+.*?Summary of Consistency Issues\s*\n", "", result) From d9f6c703637a51cb004c86bd0682822de0f6448b Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Mon, 3 Feb 2025 10:41:49 +0100 Subject: [PATCH 14/15] fix wording --- app/pipeline/inconsistency_check_pipeline.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/pipeline/inconsistency_check_pipeline.py b/app/pipeline/inconsistency_check_pipeline.py index 0dfe36cf..3ac108fb 100644 --- a/app/pipeline/inconsistency_check_pipeline.py +++ b/app/pipeline/inconsistency_check_pipeline.py @@ -75,10 +75,10 @@ def __call__(self, dto: InconsistencyCheckPipelineExecutionDTO, **kwargs): "file_path": file_path, "problem_statement": dto.exercise.problem_statement, "template_file": dto.exercise.template_repository.get( - file_path, "empty file" + file_path, "no file found" ), "solution_file": dto.exercise.solution_repository.get( - file_path, "empty file" + file_path, "no file found" ), } for file_path in file_paths From ff3c49e05f87b239f8c9c3569e80fd2566421921 Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Mon, 3 Feb 2025 10:44:28 +0100 Subject: [PATCH 15/15] update prompt --- app/pipeline/prompts/inconsistency_check_prompts.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/app/pipeline/prompts/inconsistency_check_prompts.py b/app/pipeline/prompts/inconsistency_check_prompts.py index 406232e0..47c898dc 100644 --- a/app/pipeline/prompts/inconsistency_check_prompts.py +++ b/app/pipeline/prompts/inconsistency_check_prompts.py @@ -56,7 +56,8 @@ -Respond with a summary of the consistency issues found in the exercise. Make it clear which file path contains the \ -issues. Just provide the easily digestible formatted markdown without other explanations. +Respond with a summary of the consistency issues found in the exercise, stay specific and clear so the issues can be \ +easily fixed by the teaching assistants. Make it clear which file path contains the issues. Just provide the easily \ +digestible formatted markdown without other explanations. """