Skip to content
This repository was archived by the owner on May 1, 2025. It is now read-only.

add an test example uses pytest-json-report: #13

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
179 changes: 179 additions & 0 deletions tests/output.report.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
{
"collectors": [
{
"nodeid": "",
"outcome": "passed",
"result": [
{
"nodeid": "tests/test_use_json_report.py",
"type": "Module"
}
]
},
{
"nodeid": "tests/test_use_json_report.py",
"outcome": "passed",
"result": [
{
"lineno": 42,
"nodeid": "tests/test_use_json_report.py::test_summarize_to_30_words[0]",
"type": "Function"
},
{
"lineno": 42,
"nodeid": "tests/test_use_json_report.py::test_summarize_to_30_words[1]",
"type": "Function"
},
{
"lineno": 42,
"nodeid": "tests/test_use_json_report.py::test_summarize_to_30_words[2]",
"type": "Function"
},
{
"lineno": 69,
"nodeid": "tests/test_use_json_report.py::test_pass_rate_of_30_words",
"type": "Function"
}
]
}
],
"created": 1723531836.549531,
"duration": 17.205761909484863,
"environment": {},
"exitcode": 1,
"root": "/Users/wenzhe/dev/log10_eval_example",
"summary": {
"collected": 4,
"failed": 1,
"passed": 3,
"total": 4
},
"tests": [
{
"call": {
"crash": {
"lineno": 0,
"message": "\u001b[0mNumber of words is 35, expected <= 30\nassert False\n\u001b[31mtest_use_json_report.py\u001b[0m:65 in test_summarize_to_30_words() -> with check:\n\u001b[31mtest_use_json_report.py\u001b[0m:66 in test_summarize_to_30_words -> assert num_words_less_than_30, f\"Number of words is {num_words}, expected <= 30\"\n\u001b[31mAssertionError: Number of words is 35, expected <= 30\nassert False\n\u001b[0m\n\n------------------------------------------------------------\nFailed Checks: 1",
"path": "tests/test_use_json_report.py::test_summarize_to_30_words[0]"
},
"duration": 6.695120791089721,
"longrepr": "\u001b[31mFAILURE: \u001b[0mNumber of words is 35, expected <= 30\nassert False\n\u001b[31mtest_use_json_report.py\u001b[0m:65 in test_summarize_to_30_words() -> with check:\n\u001b[31mtest_use_json_report.py\u001b[0m:66 in test_summarize_to_30_words -> assert num_words_less_than_30, f\"Number of words is {num_words}, expected <= 30\"\n\u001b[31mAssertionError: Number of words is 35, expected <= 30\nassert False\n\u001b[0m\n\n------------------------------------------------------------\nFailed Checks: 1",
"outcome": "failed"
},
"keywords": [
"test_summarize_to_30_words[0]",
"parametrize",
"pytestmark",
"0",
"test_use_json_report.py",
"tests",
"log10_eval_example",
""
],
"lineno": 42,
"metadata": {
"log10": {
"last_completion_id": "https://log10.io/app/test-0/completions/45515561-5fc3-4709-8361-8eadf349e337"
}
},
"nodeid": "tests/test_use_json_report.py::test_summarize_to_30_words[0]",
"outcome": "failed",
"setup": {
"duration": 0.0008069159230217338,
"outcome": "passed"
},
"teardown": {
"duration": 0.0001188330352306366,
"outcome": "passed"
}
},
{
"call": {
"duration": 5.655736291082576,
"outcome": "passed"
},
"keywords": [
"test_summarize_to_30_words[1]",
"parametrize",
"pytestmark",
"1",
"test_use_json_report.py",
"tests",
"log10_eval_example",
""
],
"lineno": 42,
"metadata": {
"log10": {
"last_completion_id": "https://log10.io/app/test-0/completions/29dded7a-2998-405c-a474-684ec0ba1682"
}
},
"nodeid": "tests/test_use_json_report.py::test_summarize_to_30_words[1]",
"outcome": "passed",
"setup": {
"duration": 0.0003689579898491502,
"outcome": "passed"
},
"teardown": {
"duration": 0.0006292080506682396,
"outcome": "passed"
}
},
{
"call": {
"duration": 4.296570708043873,
"outcome": "passed"
},
"keywords": [
"test_summarize_to_30_words[2]",
"parametrize",
"pytestmark",
"2",
"test_use_json_report.py",
"tests",
"log10_eval_example",
""
],
"lineno": 42,
"metadata": {
"log10": {
"last_completion_id": "https://log10.io/app/test-0/completions/9d736109-d982-45e5-a013-db7415f66f2e"
}
},
"nodeid": "tests/test_use_json_report.py::test_summarize_to_30_words[2]",
"outcome": "passed",
"setup": {
"duration": 0.002584167057648301,
"outcome": "passed"
},
"teardown": {
"duration": 0.0004907089751213789,
"outcome": "passed"
}
},
{
"call": {
"duration": 0.008413875009864569,
"outcome": "passed"
},
"keywords": [
"test_pass_rate_of_30_words",
"test_use_json_report.py",
"tests",
"log10_eval_example",
""
],
"lineno": 69,
"nodeid": "tests/test_use_json_report.py::test_pass_rate_of_30_words",
"outcome": "passed",
"setup": {
"duration": 0.006148124928586185,
"outcome": "passed"
},
"teardown": {
"duration": 0.00025579100474715233,
"outcome": "passed"
}
}
]
}
74 changes: 74 additions & 0 deletions tests/test_use_json_report.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import pytest
from pytest_check import check
import sys
import os
import pandas as pd

# Append the src directory to sys.path to make its modules available for import
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "src")))

import jsonlines
from log10.load import log10_session

from my_llm import (
summarize_to_30_words,
)
from my_eval_metrics import cosine_distance, count_words
from report_utils import (
filter_results_by_test_name,
report_pass_rate,
)


@pytest.fixture
def session():
with log10_session() as session:
assert session.last_completion_id() is None, "No completion ID should be found."
yield session


@pytest.fixture
def data():
filename = "data.jsonl"
data = []
with jsonlines.open(filename) as reader:
for obj in reader:
data.append((obj["article"], obj["summary"]))
return data


# @pytest.mark.repeat(2)
@pytest.mark.parametrize("sample_idx", range(3))
def test_summarize_to_30_words(data: list, sample_idx: int, results_bag, json_metadata, session):
article, expected_summary = data[sample_idx]
output = summarize_to_30_words(article)
cos_distance = cosine_distance(expected_summary, output)
num_words = count_words(output)

results_bag.test_name = f"test_summarize_to_30_words_{sample_idx}"
results_bag.article = article
results_bag.expected_summary = expected_summary
results_bag.output = output
results_bag.cos_sim = cos_distance
results_bag.num_words = num_words
results_bag.log10_completion_url = session.last_completion_url()
json_metadata["log10"] = {"last_completion_id": session.last_completion_url()}

num_words_less_than_30 = num_words <= 30
results_bag.num_words_less_than_30 = num_words_less_than_30

cos_distance_less_than_02 = cos_distance < 0.2
results_bag.cos_distance_less_than_02 = cos_distance_less_than_02

assert num_words_less_than_30, f"Number of words is {num_words}, expected <= 30"
assert cos_distance_less_than_02, f"Cosine distance is {cos_distance}, expected < 0.2"


def test_pass_rate_of_30_words(module_results_df: pd.DataFrame):
#save module_results_df to csv
module_results_df.to_csv("module_results_df_080724.csv", index=False)
df = filter_results_by_test_name(module_results_df, "test_summarize_to_30_words")

pass_rate, pass_rate_report_str = report_pass_rate(df)

assert pass_rate > 0.66