Skip to content

Commit 63be4b0

Browse files
authored
Added Flores 200 (#717)
* adding relevant languages + sorting everyone * added chrf++ * updated language list to rm redundancies, renamed task to follow usual pattern * fix adapter plus manage languages simply - in the future we might want to have a custom enum with one key several values
1 parent d18f11a commit 63be4b0

File tree

5 files changed

+586
-86
lines changed

5 files changed

+586
-86
lines changed

src/lighteval/metrics/metrics.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,14 @@ class Metrics(Enum):
158158
corpus_level_fn=CorpusLevelTranslationMetric("chrf").compute,
159159
higher_is_better=True,
160160
)
161+
chrf_plus = CorpusLevelMetric(
162+
metric_name="chrf++",
163+
sample_level_fn=GenerativePreparator().prepare,
164+
category=MetricCategory.GENERATIVE,
165+
use_case=MetricUseCase.TRANSLATION,
166+
corpus_level_fn=CorpusLevelTranslationMetric("chrf++").compute,
167+
higher_is_better=True,
168+
)
161169
copyright = SampleLevelMetricGrouping(
162170
metric_name=["longest_common_prefix_length", "edit_distance", "edit_similarity"],
163171
sample_level_fn=StringDistance(

src/lighteval/metrics/metrics_corpus.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,8 @@ def get_metric(self):
104104
return sacrebleu.BLEU(trg_lang=self.lang)
105105
elif self.metric_type == "chrf":
106106
return sacrebleu.CHRF()
107+
elif self.metric_type == "chrf++":
108+
return sacrebleu.CHRF(word_order=2)
107109
elif self.metric_type == "ter":
108110
return sacrebleu.TER(asian_support=True if self.lang != "" else False)
109111
else:

src/lighteval/tasks/multilingual/tasks.py

Lines changed: 251 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
# SOFTWARE.
2222

2323
from functools import partial
24+
from itertools import combinations
2425

2526
from langcodes import Language as LangCodeLanguage
2627
from langcodes import standardize_tag
@@ -30,6 +31,7 @@
3031
multilingual_quasi_exact_match_metric,
3132
multilingual_quasi_f1_score_metric,
3233
)
34+
from lighteval.metrics.metrics import Metrics
3335
from lighteval.metrics.normalizations import LogProbCharNorm, LogProbPMINorm, LogProbTokenNorm
3436
from lighteval.tasks.default_prompts import LETTER_INDICES
3537
from lighteval.tasks.lighteval_task import LightevalTaskConfig
@@ -52,13 +54,14 @@
5254
from lighteval.tasks.templates.multichoice import get_mcq_prompt_function
5355
from lighteval.tasks.templates.nli import get_nli_prompt_function
5456
from lighteval.tasks.templates.qa import get_qa_prompt_function
57+
from lighteval.tasks.templates.translation import get_translation_prompt_function
5558
from lighteval.tasks.templates.utils.formulation import (
5659
CFFormulation,
5760
HybridFormulation,
5861
MCFFormulation,
5962
)
6063
from lighteval.tasks.templates.utils.translation_literals import TRANSLATION_LITERALS
61-
from lighteval.utils.language import Language, iso_639_3_ind_to_iso_639_3_macro
64+
from lighteval.utils.language import Language, iso_639_3_ind_to_iso_639_3_macro, manage_duplicate_language_codes
6265

6366

6467
TASKS_TABLE = []
@@ -3903,3 +3906,250 @@
39033906
*hindi_boolq_tasks,
39043907
]
39053908
)
3909+
3910+
# ------------------------------- Translation Tasks ------------------------------- #
3911+
flores_200_languages = [
3912+
# "ace_Arab",
3913+
"ace_Latn",
3914+
"acm_Arab",
3915+
"acq_Arab",
3916+
"aeb_Arab",
3917+
"afr_Latn",
3918+
"ajp_Arab",
3919+
"aka_Latn",
3920+
"amh_Ethi",
3921+
"apc_Arab",
3922+
"arb_Arab",
3923+
# "arb_Latn",
3924+
"ars_Arab",
3925+
"ary_Arab",
3926+
"arz_Arab",
3927+
"asm_Beng",
3928+
"ast_Latn",
3929+
"awa_Deva",
3930+
"ayr_Latn",
3931+
"azb_Arab",
3932+
"azj_Latn",
3933+
"bak_Cyrl",
3934+
"bam_Latn",
3935+
"ban_Latn",
3936+
"bel_Cyrl",
3937+
"bem_Latn",
3938+
"ben_Beng",
3939+
"bho_Deva",
3940+
# "bjn_Arab",
3941+
"bjn_Latn",
3942+
"bod_Tibt",
3943+
"bos_Latn",
3944+
"bug_Latn",
3945+
"bul_Cyrl",
3946+
"cat_Latn",
3947+
"ceb_Latn",
3948+
"ces_Latn",
3949+
"cjk_Latn",
3950+
"ckb_Arab",
3951+
"crh_Latn",
3952+
"cym_Latn",
3953+
"dan_Latn",
3954+
"deu_Latn",
3955+
"dik_Latn",
3956+
"dyu_Latn",
3957+
"dzo_Tibt",
3958+
"ell_Grek",
3959+
"eng_Latn",
3960+
"epo_Latn",
3961+
"est_Latn",
3962+
"eus_Latn",
3963+
"ewe_Latn",
3964+
"fao_Latn",
3965+
"fij_Latn",
3966+
"fin_Latn",
3967+
"fon_Latn",
3968+
"fra_Latn",
3969+
"fur_Latn",
3970+
"fuv_Latn",
3971+
"gla_Latn",
3972+
"gle_Latn",
3973+
"glg_Latn",
3974+
"grn_Latn",
3975+
"guj_Gujr",
3976+
"hat_Latn",
3977+
"hau_Latn",
3978+
"heb_Hebr",
3979+
"hin_Deva",
3980+
"hne_Deva",
3981+
"hrv_Latn",
3982+
"hun_Latn",
3983+
"hye_Armn",
3984+
"ibo_Latn",
3985+
"ilo_Latn",
3986+
"ind_Latn",
3987+
"isl_Latn",
3988+
"ita_Latn",
3989+
"jav_Latn",
3990+
"jpn_Jpan",
3991+
"kab_Latn",
3992+
"kac_Latn",
3993+
"kam_Latn",
3994+
"kan_Knda",
3995+
# "kas_Arab",
3996+
"kas_Deva",
3997+
"kat_Geor",
3998+
# "knc_Arab",
3999+
"knc_Latn",
4000+
"kaz_Cyrl",
4001+
"kbp_Latn",
4002+
"kea_Latn",
4003+
"khm_Khmr",
4004+
"kik_Latn",
4005+
"kin_Latn",
4006+
"kir_Cyrl",
4007+
"kmb_Latn",
4008+
"kmr_Latn",
4009+
"kon_Latn",
4010+
"kor_Hang",
4011+
"lao_Laoo",
4012+
"lij_Latn",
4013+
"lim_Latn",
4014+
"lin_Latn",
4015+
"lit_Latn",
4016+
"lmo_Latn",
4017+
"ltg_Latn",
4018+
"ltz_Latn",
4019+
"lua_Latn",
4020+
"lug_Latn",
4021+
"luo_Latn",
4022+
"lus_Latn",
4023+
"lvs_Latn",
4024+
"mag_Deva",
4025+
"mai_Deva",
4026+
"mal_Mlym",
4027+
"mar_Deva",
4028+
# "min_Arab",
4029+
"min_Latn",
4030+
"mkd_Cyrl",
4031+
"plt_Latn",
4032+
"mlt_Latn",
4033+
"mni_Beng",
4034+
"khk_Cyrl",
4035+
"mos_Latn",
4036+
"mri_Latn",
4037+
"mya_Mymr",
4038+
"nld_Latn",
4039+
"nno_Latn",
4040+
"nob_Latn",
4041+
"npi_Deva",
4042+
"nso_Latn",
4043+
"nus_Latn",
4044+
"nya_Latn",
4045+
"oci_Latn",
4046+
"gaz_Latn",
4047+
"ory_Orya",
4048+
"pag_Latn",
4049+
"pan_Guru",
4050+
"pap_Latn",
4051+
"pes_Arab",
4052+
"pol_Latn",
4053+
"por_Latn",
4054+
"prs_Arab",
4055+
"pbt_Arab",
4056+
"quy_Latn",
4057+
"ron_Latn",
4058+
"run_Latn",
4059+
"rus_Cyrl",
4060+
"sag_Latn",
4061+
"san_Deva",
4062+
"sat_Olck",
4063+
"scn_Latn",
4064+
"shn_Mymr",
4065+
"sin_Sinh",
4066+
"slk_Latn",
4067+
"slv_Latn",
4068+
"smo_Latn",
4069+
"sna_Latn",
4070+
"snd_Arab",
4071+
"som_Latn",
4072+
"sot_Latn",
4073+
"spa_Latn",
4074+
"als_Latn",
4075+
"srd_Latn",
4076+
"srp_Cyrl",
4077+
"ssw_Latn",
4078+
"sun_Latn",
4079+
"swe_Latn",
4080+
"swh_Latn",
4081+
"szl_Latn",
4082+
"tam_Taml",
4083+
"tat_Cyrl",
4084+
"tel_Telu",
4085+
"tgk_Cyrl",
4086+
"tgl_Latn",
4087+
"tha_Thai",
4088+
"tir_Ethi",
4089+
"taq_Latn",
4090+
"taq_Tfng",
4091+
"tpi_Latn",
4092+
"tsn_Latn",
4093+
"tso_Latn",
4094+
"tuk_Latn",
4095+
"tum_Latn",
4096+
"tur_Latn",
4097+
"twi_Latn",
4098+
"tzm_Tfng",
4099+
"uig_Arab",
4100+
"ukr_Cyrl",
4101+
"umb_Latn",
4102+
"urd_Arab",
4103+
"uzn_Latn",
4104+
"vec_Latn",
4105+
"vie_Latn",
4106+
"war_Latn",
4107+
"wol_Latn",
4108+
"xho_Latn",
4109+
"ydd_Hebr",
4110+
"yor_Latn",
4111+
"yue_Hant",
4112+
"zho_Hans",
4113+
# "zho_Hant",
4114+
"zsm_Latn",
4115+
"zul_Latn",
4116+
]
4117+
4118+
4119+
def flores_adapter(lang1, lang2):
4120+
return lambda line: {
4121+
"source_text": line[f"sentence_{lang1}"],
4122+
"target_text": line[f"sentence_{lang2}"],
4123+
}
4124+
4125+
4126+
flores200_tasks = [
4127+
LightevalTaskConfig(
4128+
name=f"flores200:{lang1}-{lang2}",
4129+
prompt_function=get_translation_prompt_function(
4130+
source_language=Language(manage_duplicate_language_codes(lang1.split("_")[0])),
4131+
target_language=Language(manage_duplicate_language_codes(lang2.split("_")[0])),
4132+
adapter=flores_adapter(lang1, lang2),
4133+
formulation=CFFormulation(),
4134+
),
4135+
suite=("lighteval",),
4136+
hf_repo="facebook/flores",
4137+
hf_subset=f"{lang1}-{lang2}",
4138+
hf_avail_splits=["dev", "devtest"],
4139+
evaluation_splits=["devtest"],
4140+
few_shots_split="dev",
4141+
few_shots_select=None,
4142+
generation_size=300,
4143+
metric=[Metrics.chrf_plus, Metrics.bleu, Metrics.bleu_1, Metrics.bleu_4],
4144+
stop_sequence=["\n"],
4145+
trust_dataset=True,
4146+
version=0,
4147+
)
4148+
for (lang1, lang2) in combinations(flores_200_languages, 2)
4149+
]
4150+
4151+
TASKS_TABLE.extend(
4152+
[
4153+
*flores200_tasks,
4154+
]
4155+
)

0 commit comments

Comments
 (0)