DEFAULT_SYSTEM_MESSAGE = "You are an AI assistant to help user identify errors in a model generated output for the ${task} task."
CHATGPT_SYSTEM_MESSAGE = "You are an AI assistant."
EA_TEMPLATE = [
"""\
Source: ${input_context}
Output: ${hypothesis_output}
The output is generated by a model with the following instruction: ${generation_instruction}
The correct output for this source input is: ${reference_output}
Identify the major and minor errors in this model generated output. 
Note that Major errors refer to actual errors that affects the task severely, and Minor errors refer to small imperfections, and purely subjective opinions about the output. \
You can compare the output with the reference output to identify the errors, but should not explain by simply mentioning the difference between them. \
Instead, give reasonable explanations like without a reference. Also explain like an expert in the task domain, not like a human annotator. \
"""
,
"""For each error you give in the response, please also elaborate the following information in a specific json format:
- error location (the substring that is wrong in the hypothesis output)
- error aspect it belongs to. Choose from ${aspect_descriptions}
- concise explanation why it's an error, and the correction suggestions. 
- severity of the error ("Major" or "Minor"). 
- reduction of score (Between 0.5 and 5 given the severity of the error)
Output JSON format:
- if there is no error
```
{"errors": {}}
```
- if there are errors
```
{"errors": { "error_1": { "error_location": "...", "error_aspect": "...", "explanation": "...", "severity": "...", "score_reduction": ..., }, ...}
```
""",                  
]

# Old WMT MQM templates, Do not delete this, archive only
OLD_EA_TEMPLATES = ["""\
Source: ${input_context}
Reference: ${reference_output}
Output: ${hypothesis_output}
Based on the given source and reference, identify the major and minor errors in this output for a(an) ${task} task. \
Note that Major errors refer to actual errors that affects the task severely, and Minor errors refer to smaller imperfections, and purely subjective opinions about the output.
"""
,
"""\
## For each error you give in the response, please also elaborate the following information:
- error location (the words that are wrong in the hypothesis output)
- error aspect it belongs to. Choose from ${aspect_descriptions}
- explanation why it's an error, and the correction suggestions. Do not mention the reference output in the explanation. Instead, explain from the perspective of the source and the hypothesis output.
- severity of the error ("Major" or "Minor"). 
- reduction of score (Major for 5, Minor for 1)
## Output JSON format:
- if there is no error
```
{"errors": {}}
```
- if there are errors
```
{"errors": { "error_1": { "error_location": "...", "error_aspect": "...", "explanation": "...", "severity": "...", "score_reduction": ..., }, ...}
```
"""                 
]

# This template's performance has been verified
WMT_MQM_TEMPLATES = ["""\
Translation Instruction: ${generation_instruction}
Source Text: ${input_context}
${reference_output}
Model-generated Translation: ${hypothesis_output}

Please identify and categorize the errors in the model-generated translation as either Major or Minor. Major errors significantly impact the task, while Minor errors are subjective and represent minor imperfections.

When identifying errors, do not solely rely on the reference translation for comparison. Provide explanations as an expert in the task domain, without explicitly mentioning the reference output.
""",
"""\
For each error identified above, please provide the following information in a specific JSON format:
- Error Location: The substring in the model-generated translation that contains the error.
- Error Aspect: Choose from ${aspect_descriptions}.
- Explanation: Describe why the identified issue is an error, and offer suggestions for correction. Explain as an expert in the task domain, without explicitly mentioning the reference output.
- Severity: Classify the error as "Major" or "Minor".
- Score Reduction: Assign a reduction score between 0.5 and 5, considering the severity of the error.

JSON Format for Output:
- If there are no errors:
```
{"errors": {}}
```
- If there are errors:
```
{"errors": {
"error_1": {
"error_location": "...",
"error_aspect": "...",
"explanation": "...",
"severity": "...",
"score_reduction": ...
},
...
}}
```
""",  # explanation, error_location, error_aspect, severity, score_reduction
]

STARS_TEMPLATE = ["""\
Source: ${input_context}
Reference: ${reference_output}
Output: ${hypothesis_output}\
""", # input_context, reference_output, hypothesis_output
"""\
Based on the given source and reference, identify the major and minor errors in this output for a(an) ${task} task. \
Note that Major errors refer to actual errors that affects the task severely, may change the meaning of the output, and Minor errors refer to smaller imperfections, and purely subjective opinions about the output. \
There may be multiple errors or no error in the output. Please give the error location.
Here are the aspects you need to check: 
${aspects_descriptions}
""", # task
"""\
## Please check your response, score the summarization to aspects below, with 0.5 to 5.

Here are the descriptions of the aspects:
${aspects_list}

## Output JSON format:
{
    "%Aspect%": {
        "Explanation": "...",
        "Score": "..."
    },
    ...
}
"""
]

MULTI_ASPECTS_TEMPLATE = [
"""\
Source: ${input_context}
Reference: ${reference_output}
Output: ${hypothesis_output}\
""", # input_context, reference_output, hypothesis_output
# """\
# Based on the given source and reference, identify the major and minor errors in this output for the ${task} task. \
# Major errors significantly impact the task or change the meaning, while Minor errors are subjective and represent minor imperfections. \
# There may be multiple errors or **no** error in the output. Here are the aspects you need to focus on: 
# ${aspects_descriptions}
# """, # task, aspects_descriptions  
"""\
Based on the given Source and Reference, please identify the major and minor errors in this Output for the summarization task. \
Major errors significantly impact the task or change the meaning, while Minor errors are subjective and represent minor imperfections. \
Here are the aspects you need to focus on: 
${aspects_descriptions}
There may be multiple errors or no error in the output.\
""", # task, aspects_descriptions  
# """\
# Based on the given source and reference, identify the major and minor errors in this output for the ${task} task. \
# Major errors significantly impact the task or change the meaning, while Minor errors are subjective and represent minor imperfections. \
# Here are the aspects you need to focus on: 
# ${aspects_descriptions}
# """, # task, aspects_descriptions  
# For long text, we need" There may be multiple errors or no error in the output."
# For short text, delete "There may be multiple errors or no error in the output."
"""For each error identified above, please provide the following information in a specific JSON format:
- Error Location: The substring in the hypothesis output that contains the error.
- Error Aspect: Choose only one from ${aspects_list}.
- Explanation: Describe why the identified issue is an error, and offer suggestions for correction. Explain as an expert in the task domain, without explicitly mentioning the reference output.
- Severity: Classify the error as "Major" or "Minor".
- Score Reduction: Assign a reduction score between 0.5 and 5, considering the severity of the error.

JSON Format for Output:
- If there are no errors:
```
{"errors": {}}
```
- If there are errors:
{"errors": {
"error_1": {
"error_location": "...",
"error_aspect": "...",
"explanation": "...",
"severity": "...",
"score_reduction": ...
},
...
}}\
""", # aspect_descriptions
]

ONE_SHOT_TEMPLATE = ["""\
Task instruction: ${task_instruction}\
Here is a input for the ${task} task:
${input_context}

Output:
""",
"""
Source: ${input_context}
Reference: ${reference_output}
Output: ${hypothesis_output}\
""",
"""\
Based on the given source and reference, identify the major and minor errors in this output for the ${task} task. \
Major errors significantly impact the task or change the meaning, while Minor errors are subjective and represent minor imperfections. \
There may be multiple errors or no error in the output. Here are the aspects you need to focus on: 
${aspects_descriptions}
""", # task, aspects_descriptions
"""For each error identified above, please provide the following information in a specific JSON format:
- Error Location: The substring in the hypothesis output that contains the error.
- Error Aspect: Choose only one from ${aspects_list}.
- Explanation: Describe why the identified issue is an error, and offer suggestions for correction. Explain as an expert in the task domain, without explicitly mentioning the reference output.
- Severity: Classify the error as "Major" or "Minor".
- Score Reduction: Assign a reduction score between 0.5 and 5, considering the severity of the error.

JSON Format for Output:
- If there are no errors:
```
{"errors": {}}
```
- If there are errors:
{"errors": {
"error_1": {
"error_location": "...",
"error_aspect": "...",
"explanation": "...",
"severity": "...",
"score_reduction": ...
},
...
}}\
""", # aspect_descriptions
]

OLD_MULTI_ASPECTS_TEMPLATE = [
"""\
Source: ${input_context}
Reference: ${reference_output}
Output: ${hypothesis_output}\
""", # input_context, reference_output, hypothesis_output
"""\
Based on the given source and reference, identify the major and minor errors in this output for the summarization task. \
Major errors significantly impact the task or change the meaning, while Minor errors are subjective and represent minor imperfections. \
Here are the aspects you need to focus on: 
${aspects_descriptions}
""", # task, aspects_descriptions  
# For long text, we need" There may be multiple errors or no error in the output."
# For short text, delete "There may be multiple errors or no error in the output."
"""For each error you give in the response, please also elaborate the following information:
- error location (the words that are wrong in the hypothesis output)
- error aspect it belongs to. Choose from aspects below.
- explanation why it's an error, and the correction suggestions. Do not mention the reference output in the explanation. Instead, explain from the perspective of the source and the hypothesis output.
- severity of the error (\"Major\" or \"Minor\").
- reduction of score (Between 0.5 and 5)

Here are the aspects you need to focus on:
${aspects_list}

## Output JSON format: 
# - if there is no error
# ```
# {\"errors\": {}}
# ```
# - if there are errors
# ```
# {\"errors\": { \"error_1\": { \"error_location\": \"...\", \"error_aspect\": \"...\", \"explanation\": \"...\", \"severity\": \"...\", \"score_reduction\": ..., }, ...}```\
""", # aspect_descriptions
]

# SUMMARIZATION_TEMPLATE = [
# """\
# Summarization Instruction:${generation_instruction}
# Source Text: ${input_context}
# ${reference_output}
# Model-generated Summary Output: ${hypothesis_output}\
# """, # input_context, reference_output, hypothesis_output
# """\
# Based on the given source and reference, identify the major and minor errors in this output for the ${task} task. \
# Major errors significantly impact the task or change the meaning, while Minor errors are subjective and represent minor imperfections. \
# There may be multiple errors or no error in the output. Here are the aspects you need to focus on: 
# ${aspects_descriptions}
# """, # task, aspects_descriptions  
# # """\
# # Based on the given source and reference, identify the major and minor errors in this output for the ${task} task. \
# # Major errors significantly impact the task or change the meaning, while Minor errors are subjective and represent minor imperfections. \
# # Here are the aspects you need to focus on: 
# # ${aspects_descriptions}
# # """, # task, aspects_descriptions  
# # For long text, we need" There may be multiple errors or no error in the output."
# # For short text, delete "There may be multiple errors or no error in the output."
# """For each error identified above, please provide the following information in a specific JSON format:
# - Error Location: The substring in the hypothesis output that contains the error.
# - Error Aspect: Choose only one from ${aspects_list}.
# - Explanation: Describe why the identified issue is an error, and offer suggestions for correction. Explain as an expert in the task domain, without explicitly mentioning the reference output.
# - Severity: Classify the error as "Major" or "Minor".
# - Score Reduction: Assign a reduction score between 0.5 and 5, considering the severity of the error.

# JSON Format for Output:
# - If there are no errors:
# ```
# {"errors": {}}
# ```
# - If there are errors:
# {"errors": {
# "error_1": {
# "error_location": "...",
# "error_aspect": "...",
# "explanation": "...",
# "severity": "...",
# "score_reduction": ...
# },
# ...
# }}\
# """, # aspect_descriptions
# ]

ALIGN_SCORE_TEMPLATE = [
"""\
Source: ${input_context}
Reference: ${reference_output}
Output: ${hypothesis_output}\
""", # input_context, reference_output, hypothesis_output
"""\
Based on the given Source and Reference, please evaluate the quality of summary(Output) written for the input text. \
Please score the summarization with 0.5 to 5 for aspects below. \
Then, identify the major and minor errors in this output for the ${task} task. \
There may be multiple errors or no error in the output. Here are the aspects you need to focus on: 
${aspects_descriptions}\
""", # Note that Major errors refer to actual errors that affects the task severely, may change the meaning of the output, and Minor errors refer to smaller imperfections, and purely subjective opinions about the output. \
"""For each error identified in your response, please provide the following information in a specific JSON format:
- Error Location: The substring in the Output that contains the error.
- Error Aspect: Choose only one from ${aspects_list}.
- Explanation: Describe why the identified issue is an error, and offer suggestions for correction. Explain as an expert in the task domain, without explicitly mentioning the reference output.
- Severity: Classify the error as "Major" or "Minor".
- Score Reduction: Assign a reduction score between 0.5 and 5, considering the severity of the error.

JSON Format for Output:
- If there are no errors:
```
{"errors": {}}
```
- If there are errors:
{"errors": {
"error_1": {
"error_location": "...",
"error_aspect": "...",
"explanation": "...",
"severity": "...",
"score_reduction": ...
},
...
}}\
""",
]

D2T_TEMPLATE = [
"""\
Task instruction:${generation_instruction}
Source: ${input_context}
${reference_output}
Output: ${hypothesis_output}\
""",
# """\
# Based on the given source and reference, identify the major and minor errors in this output for the data to text task.  \
# Note that Major errors refer to actual errors that affects the task severely, and Minor errors refer to smaller imperfections, and purely subjective opinions about the output. \
# Here are the aspects you need to focus on:
# ${aspects_descriptions}\
# """,
# """
# For each error you give in the response, please also elaborate the following information:
# - error location (the words that are wrong in the hypothesis output)
# - the main error aspect it belongs to. Choose **only one** from aspects below.
# - concise explanation why it's an error, and the correction suggestions. Do not mention the reference output in the explanation. Instead, explain from the perspective of the source and the hypothesis output.
# - severity of the error ("Major" or "Minor"). 
# - reduction of score (Between 0.5 and 5)

# Here are the aspects you need to focus on:
# ${aspects_descriptions}\

# ## Output JSON format:
# - if there is no error
# ```
# {"errors": {}}
# ```
# - if there are errors
# ```
# {"errors": { "error_1": { "error_location": "...", "error_aspect": "...", "explanation": "...", "severity": "...", "score_reduction": ..., }, ...}
# ```\
# """,
# """\
# Source: ${input_context}
# ${reference_output}
# Output: ${hypothesis_output}\
# """, # input_context, reference_output, hypothesis_output
# """\
# Based on the given Source and Reference, please evaluate the quality of Output written for the input text, which is to ${generation_instruction}. \
# Please score the Output with 0.5 to 5 for aspects below. \
# Then, identify the major and minor errors in this output for the ${task} task. \
# There may be multiple errors or no error in the output. Here are the aspects you need to focus on: 
# ${aspects_descriptions}\
# """,
# """For each error identified in your response, please provide the following information in a specific JSON format:
# - Error Location: The substring in the Output that contains the error.
# - Error Aspect: Choose only one from ${aspects_list}.
# - Explanation: Describe why the identified issue is an error, and offer suggestions for correction. Explain as an expert in the task domain, without explicitly mentioning the reference output.
# - Severity: Classify the error as "Major" or "Minor".
# - Score Reduction: Assign a reduction score between 0.5 and 5, considering the severity of the error and the score given.

# JSON Format for Output:
# - If there are no errors:
# ```
# {"errors": {}}
# ```
# - If there are errors:
# {"errors": {
# "error_1": {
# "error_location": "...",
# "error_aspect": "...",
# "explanation": "...",
# "severity": "...",
# "score_reduction": ...
# },
# ...
# }}\
# """,

"""\
Based on the given Source and reference, identify the major and minor errors in this Output for the data to text task, which is to ${generation_instruction}. \
Note that Major errors refer to actual errors that affects the task severely, may change the meaning of the output, and Minor errors refer to smaller imperfections, and purely subjective opinions about the output. \
There may be multiple errors or no error in the output.\
""", # aspects_list\  \  You should check about ${aspects_descriptions}. \
# """For each error identified in your response, please provide the following information in a specific JSON format:
# - Explanation: Describe why the identified issue is an error, and offer suggestions for correction. Explain as an expert in the task domain, without explicitly mentioning the reference output.
# - Error Location: The substring in the Output that contains the error.
# - Error Aspect: Choose only one from ${aspects_list}.
# - Severity: Classify the error as "Major" or "Minor".
# - Score Reduction: Assign a reduction score between 0.5 and 5, considering the severity of the error.

# JSON Format for Output:
# - If there are no errors:
# ```
# {"errors": {}}
# ```
# - If there are errors:
# {"errors": {
# "error_1": {
# "explanation": "...",
# "error_location": "...",
# "error_aspect": "...",
# "severity": "...",
# "score_reduction": ...
# },
# ...
# }}\
# """,
"""For each error identified in your response, please provide the following information in a specific JSON format:
- Error Location: The substring in the Output that contains the error.
- Error Aspect: Choose only one from ${aspects_list}.
- Explanation: Describe why the identified issue is an error, and offer suggestions for correction. Explain as an expert in the task domain, without explicitly mentioning the reference output.
- Severity: Classify the error as "Major" or "Minor".
- Score Reduction: Assign a reduction score between 0.5 and 5, considering the severity of the error.

JSON Format for Output:
- If there are no errors:
```
{"errors": {}}
```
- If there are errors:
{"errors": {
"error_1": {
"error_location": "...",
"error_aspect": "...",
"explanation": "...",
"severity": "...",
"score_reduction": ...
},
...
}}\
""",
]

KB_TEMPLATES = [
"""${generation_instruction}
Source Text: ${input_context}
Correct Output: ${reference_output}

Generate an incorrect output for the given source text. The incorrect output should satisfy the following requirements:
${error_requirements}
Note that major errors refer to actual errors that affects the task severely, and minor errors refer to smaller imperfections, and purely subjective opinions about the output. \
For each error, give me the error location, error aspect, explanation, severity ("major" or "minor"), and score reduction (between 0.5 to 5 given the severity of the error).
The output should be in the following format:
```
{
"incorrect_output": <your incorrect output>,
"errors": {
"error_1": {
"error_location": "...",
"error_aspect": "...",
"explanation": "...",
"severity": "...",
"score_reduction": ...
},
...
}
}
```
"""
]
KB_TXT_TEMPLATES = [
"""You are generating an output given the following instruction and context:
${generation_instruction}
${input_context}

The correct output is: ${reference_output}

Please generate an incorrect output for the given instruction and context by modifying the correct output. The incorrect output should contain the following errors:
${error_requirements}
For each error, give me the 
- error location (the substring that is wrong in the generated incorrect output)
- error aspect
- explanation (the generic error type description, why it's an error, and the correction suggestions)
- severity ("major" or "minor")
- score reduction (an integer between 1 to 5 given the severity of the error)

Output format:
Generated incorrect output: 

Error location 1:
Error aspect 1:
Explanation 1:
Severity 1:
Score reduction 1:
...
"""
]

INSTRUCTION_TEMPLATE = [
"""
${generation_instruction}

Source Text: ${input_context}
Output:\
"""
]

LONGFORM_QA_TEMPLATE = [
"""\
Source: ${input_context}
${reference_output}
Output: ${hypothesis_output}\
""",
"""\
Based on the given Source and reference, identify the major and minor errors in this Output for the ${task} task, which is to ${generation_instruction}. \
Note that Major errors refer to actual errors that affects the task severely, may change the meaning of the output, and Minor errors refer to smaller imperfections, and purely subjective opinions about the output. \
You should check about ${aspects_descriptions}.There may be multiple errors or no error in the output.\
""", # aspects_list\  \   \
# """For each error identified in your response, please provide the following information in a specific JSON format:
# - Explanation: Describe why the identified issue is an error, and offer suggestions for correction. Explain as an expert in the task domain, without explicitly mentioning the reference output.
# - Error Location: The substring in the Output that contains the error.
# - Error Aspect: Choose only one from ${aspects_list}.
# - Severity: Classify the error as "Major" or "Minor".
# - Score Reduction: Assign a reduction score between 0.5 and 5, considering the severity of the error.

# JSON Format for Output:
# - If there are no errors:
# ```
# {"errors": {}}
# ```
# - If there are errors:
# {"errors": {
# "error_1": {
# "explanation": "...",
# "error_location": "...",
# "error_aspect": "...",
# "severity": "...",
# "score_reduction": ...
# },
# ...
# }}\
# """
"""For each error identified in your response, please provide the following information in a specific JSON format:
- Error Location: The substring in the Output that contains the error.
- Error Aspect: Choose only one from ${aspects_list}.
- Explanation: Describe why the identified issue is an error, and offer suggestions for correction. Explain as an expert in the task domain, without explicitly mentioning the reference output.
- Severity: Classify the error as "Major" or "Minor".
- Score Reduction: Assign a reduction score between 0.5 and 5, considering the severity of the error.

JSON Format for Output:
- If there are no errors:
```
{"errors": {}}
```
- If there are errors:
{"errors": {
"error_1": {
"error_location": "...",
"error_aspect": "...",
"explanation": "...",
"severity": "...",
"score_reduction": ...
},
...
}}\
"""]

INSTRUCTION_FOLLOWING_TEMPLATE = [
"""\
${generation_instruction_and_source}
${reference_output}
Output: ${hypothesis_output}\
""",
"""\
Based on the given Source and reference, identify the major and minor errors in this Output for the ${task} task. \
Note that Major errors refer to actual errors that affects the task severely, may change the meaning of the output, and Minor errors refer to smaller imperfections, and purely subjective opinions about the output. \
You should check about ${aspects_descriptions}.There may be multiple errors or no error in the output.\
""", # aspects_list\  \   \
"""For each error identified in your response, please provide the following information in a specific JSON format:
- Error Location: The substring in the Output that contains the error.
- Error Aspect: Choose only one from ${aspects_list}.
- Explanation: Describe why the identified issue is an error, and offer suggestions for correction.
- Severity: Classify the error as "Major" or "Minor".
- Score Reduction: Assign a reduction score between 0.5 and 5, considering the severity of the error.

JSON Format for Output:
- If there are no errors:
```
{"errors": {}}
```
- If there are errors:
{"errors": {
"error_1": {
"error_location": "...",
"error_aspect": "...",
"explanation": "...",
"severity": "...",
"score_reduction": ...
},
...
}}\
"""]

PARAPHRASE_TEMPLATES = [
"""\
You are generating an output given the following instruction and context:
${generation_instruction}
${input_context}

An reference output is: 
${reference_output}

However, there might still be errors in the reference output. Please paraphrase the reference output to generate a new output that is better than the reference output. The new output should be grammatically correct and semantically consistent with the input. The new output should also be more fluent and natural than the reference output.
""",]


MATHQA_TEMPLATES = [
"""\
${generation_instruction}
${input_context}

The correct solution is:
${reference_output}

A model-generated solution is:
${hypothesis_output}

Please identify all the errors in this output considering the following aspects:
${aspects_list}
""",
"""
For each error identified in your response, please elaborate the following information:
- Error Location: The substring in the Output that contains the error.
- Error Aspect: Choose only one from ${aspects_list}
- Explanation: Describe why it is an error , and offer suggestions for correction.
- Severity: Classify the error as "Major" or "Minor".
- Score Reduction: Assign a reduction score between 0.5 and 5, considering the severity of the error.

JSON Format for Output:
- If there are no errors:
```
{"errors": {}}
```
- If there are errors:
{"errors": {
"error_1": {
"error_location": "...",
"error_aspect": "...",
"explanation": "...",
"severity": "...",
"score_reduction": ...
},
...
}}
"""
]

ZERO_SHOT_BASELINE_TEMPLATE = [
"""\
${generation_instruction}
${input_context}

Model-generated Output:
${hypothesis_output}


You should rate Model-generated Output on a scale from 0.5 (worst) to 10 (best).\
Rating:\
"""]