Skip to content

Added Flores #717

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
May 19, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions src/lighteval/metrics/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,14 @@ class Metrics(Enum):
corpus_level_fn=CorpusLevelTranslationMetric("chrf").compute,
higher_is_better=True,
)
chrf_plus = CorpusLevelMetric(
metric_name="chrf++",
sample_level_fn=GenerativePreparator().prepare,
category=MetricCategory.GENERATIVE,
use_case=MetricUseCase.TRANSLATION,
corpus_level_fn=CorpusLevelTranslationMetric("chrf++").compute,
higher_is_better=True,
)
copyright = SampleLevelMetricGrouping(
metric_name=["longest_common_prefix_length", "edit_distance", "edit_similarity"],
sample_level_fn=StringDistance(
Expand Down
2 changes: 2 additions & 0 deletions src/lighteval/metrics/metrics_corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,8 @@ def get_metric(self):
return sacrebleu.BLEU(trg_lang=self.lang)
elif self.metric_type == "chrf":
return sacrebleu.CHRF()
elif self.metric_type == "chrf++":
return sacrebleu.CHRF(word_order=2)
elif self.metric_type == "ter":
return sacrebleu.TER(asian_support=True if self.lang != "" else False)
else:
Expand Down
252 changes: 251 additions & 1 deletion src/lighteval/tasks/multilingual/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
# SOFTWARE.

from functools import partial
from itertools import combinations

from langcodes import Language as LangCodeLanguage
from langcodes import standardize_tag
Expand All @@ -30,6 +31,7 @@
multilingual_quasi_exact_match_metric,
multilingual_quasi_f1_score_metric,
)
from lighteval.metrics.metrics import Metrics
from lighteval.metrics.normalizations import LogProbCharNorm, LogProbPMINorm, LogProbTokenNorm
from lighteval.tasks.default_prompts import LETTER_INDICES
from lighteval.tasks.lighteval_task import LightevalTaskConfig
Expand All @@ -52,13 +54,14 @@
from lighteval.tasks.templates.multichoice import get_mcq_prompt_function
from lighteval.tasks.templates.nli import get_nli_prompt_function
from lighteval.tasks.templates.qa import get_qa_prompt_function
from lighteval.tasks.templates.translation import get_translation_prompt_function
from lighteval.tasks.templates.utils.formulation import (
CFFormulation,
HybridFormulation,
MCFFormulation,
)
from lighteval.tasks.templates.utils.translation_literals import TRANSLATION_LITERALS
from lighteval.utils.language import Language, iso_639_3_ind_to_iso_639_3_macro
from lighteval.utils.language import Language, iso_639_3_ind_to_iso_639_3_macro, manage_duplicate_language_codes


TASKS_TABLE = []
Expand Down Expand Up @@ -3903,3 +3906,250 @@
*hindi_boolq_tasks,
]
)

# ------------------------------- Translation Tasks ------------------------------- #
flores_200_languages = [
# "ace_Arab",
"ace_Latn",
"acm_Arab",
"acq_Arab",
"aeb_Arab",
"afr_Latn",
"ajp_Arab",
"aka_Latn",
"amh_Ethi",
"apc_Arab",
"arb_Arab",
# "arb_Latn",
"ars_Arab",
"ary_Arab",
"arz_Arab",
"asm_Beng",
"ast_Latn",
"awa_Deva",
"ayr_Latn",
"azb_Arab",
"azj_Latn",
"bak_Cyrl",
"bam_Latn",
"ban_Latn",
"bel_Cyrl",
"bem_Latn",
"ben_Beng",
"bho_Deva",
# "bjn_Arab",
"bjn_Latn",
"bod_Tibt",
"bos_Latn",
"bug_Latn",
"bul_Cyrl",
"cat_Latn",
"ceb_Latn",
"ces_Latn",
"cjk_Latn",
"ckb_Arab",
"crh_Latn",
"cym_Latn",
"dan_Latn",
"deu_Latn",
"dik_Latn",
"dyu_Latn",
"dzo_Tibt",
"ell_Grek",
"eng_Latn",
"epo_Latn",
"est_Latn",
"eus_Latn",
"ewe_Latn",
"fao_Latn",
"fij_Latn",
"fin_Latn",
"fon_Latn",
"fra_Latn",
"fur_Latn",
"fuv_Latn",
"gla_Latn",
"gle_Latn",
"glg_Latn",
"grn_Latn",
"guj_Gujr",
"hat_Latn",
"hau_Latn",
"heb_Hebr",
"hin_Deva",
"hne_Deva",
"hrv_Latn",
"hun_Latn",
"hye_Armn",
"ibo_Latn",
"ilo_Latn",
"ind_Latn",
"isl_Latn",
"ita_Latn",
"jav_Latn",
"jpn_Jpan",
"kab_Latn",
"kac_Latn",
"kam_Latn",
"kan_Knda",
# "kas_Arab",
"kas_Deva",
"kat_Geor",
# "knc_Arab",
"knc_Latn",
"kaz_Cyrl",
"kbp_Latn",
"kea_Latn",
"khm_Khmr",
"kik_Latn",
"kin_Latn",
"kir_Cyrl",
"kmb_Latn",
"kmr_Latn",
"kon_Latn",
"kor_Hang",
"lao_Laoo",
"lij_Latn",
"lim_Latn",
"lin_Latn",
"lit_Latn",
"lmo_Latn",
"ltg_Latn",
"ltz_Latn",
"lua_Latn",
"lug_Latn",
"luo_Latn",
"lus_Latn",
"lvs_Latn",
"mag_Deva",
"mai_Deva",
"mal_Mlym",
"mar_Deva",
# "min_Arab",
"min_Latn",
"mkd_Cyrl",
"plt_Latn",
"mlt_Latn",
"mni_Beng",
"khk_Cyrl",
"mos_Latn",
"mri_Latn",
"mya_Mymr",
"nld_Latn",
"nno_Latn",
"nob_Latn",
"npi_Deva",
"nso_Latn",
"nus_Latn",
"nya_Latn",
"oci_Latn",
"gaz_Latn",
"ory_Orya",
"pag_Latn",
"pan_Guru",
"pap_Latn",
"pes_Arab",
"pol_Latn",
"por_Latn",
"prs_Arab",
"pbt_Arab",
"quy_Latn",
"ron_Latn",
"run_Latn",
"rus_Cyrl",
"sag_Latn",
"san_Deva",
"sat_Olck",
"scn_Latn",
"shn_Mymr",
"sin_Sinh",
"slk_Latn",
"slv_Latn",
"smo_Latn",
"sna_Latn",
"snd_Arab",
"som_Latn",
"sot_Latn",
"spa_Latn",
"als_Latn",
"srd_Latn",
"srp_Cyrl",
"ssw_Latn",
"sun_Latn",
"swe_Latn",
"swh_Latn",
"szl_Latn",
"tam_Taml",
"tat_Cyrl",
"tel_Telu",
"tgk_Cyrl",
"tgl_Latn",
"tha_Thai",
"tir_Ethi",
"taq_Latn",
"taq_Tfng",
"tpi_Latn",
"tsn_Latn",
"tso_Latn",
"tuk_Latn",
"tum_Latn",
"tur_Latn",
"twi_Latn",
"tzm_Tfng",
"uig_Arab",
"ukr_Cyrl",
"umb_Latn",
"urd_Arab",
"uzn_Latn",
"vec_Latn",
"vie_Latn",
"war_Latn",
"wol_Latn",
"xho_Latn",
"ydd_Hebr",
"yor_Latn",
"yue_Hant",
"zho_Hans",
# "zho_Hant",
"zsm_Latn",
"zul_Latn",
]


def flores_adapter(lang1, lang2):
return lambda line: {
"source_text": line[f"sentence_{lang1}"],
"target_text": line[f"sentence_{lang2}"],
}


flores200_tasks = [
LightevalTaskConfig(
name=f"flores200:{lang1}-{lang2}",
prompt_function=get_translation_prompt_function(
source_language=Language(manage_duplicate_language_codes(lang1.split("_")[0])),
target_language=Language(manage_duplicate_language_codes(lang2.split("_")[0])),
adapter=flores_adapter(lang1, lang2),
formulation=CFFormulation(),
),
suite=("lighteval",),
hf_repo="facebook/flores",
hf_subset=f"{lang1}-{lang2}",
hf_avail_splits=["dev", "devtest"],
evaluation_splits=["devtest"],
few_shots_split="dev",
few_shots_select=None,
generation_size=300,
metric=[Metrics.chrf_plus, Metrics.bleu, Metrics.bleu_1, Metrics.bleu_4],
stop_sequence=["\n"],
trust_dataset=True,
version=0,
)
for (lang1, lang2) in combinations(flores_200_languages, 2)
]

TASKS_TABLE.extend(
[
*flores200_tasks,
]
)
Loading
Loading