fix methods and ndjson
This commit is contained in:
@@ -357,6 +357,25 @@ final class NdjsonHybridRetrieverConfig
|
|||||||
{
|
{
|
||||||
return $this->stringList('looks_like_device_words', self::LOOKS_LIKE_DEVICE_WORDS);
|
return $this->stringList('looks_like_device_words', self::LOOKS_LIKE_DEVICE_WORDS);
|
||||||
}
|
}
|
||||||
|
/**
|
||||||
|
* Effective retrieval vocabulary as a dedicated structure for diagnostics and config dumps.
|
||||||
|
*
|
||||||
|
* @return array<string, array<int, string>>
|
||||||
|
*/
|
||||||
|
public function vocabularyToArray(): array
|
||||||
|
{
|
||||||
|
return [
|
||||||
|
'generic_product_tokens' => $this->genericProductTokens(),
|
||||||
|
'important_short_model_tokens' => $this->importantShortModelTokens(),
|
||||||
|
'family_descriptor_tokens' => $this->familyDescriptorTokens(),
|
||||||
|
'looks_like_reagent_tokens' => $this->looksLikeReagentTokens(),
|
||||||
|
'looks_like_safety_docs' => $this->looksLikeSafetyDocs(),
|
||||||
|
'looks_like_reagent_words' => $this->looksLikeReagentWords(),
|
||||||
|
'looks_like_document_words' => $this->looksLikeDocumentWords(),
|
||||||
|
'looks_like_safety_words' => $this->looksLikeSafetyWords(),
|
||||||
|
'looks_like_device_words' => $this->looksLikeDeviceWords(),
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return array<string, mixed>
|
* @return array<string, mixed>
|
||||||
|
|||||||
@@ -1,329 +1,19 @@
|
|||||||
{
|
{"id":"retrieval_exact_doc_001","type":"retrieval","prompt":"Testomat 808","assert":{"selection_mode_in":["exact_document_title"],"min_results":1,"must_include_one_of_document_ids":["26129c01-c09f-4c71-9c80-7ddffb6c77fb"],"must_include_any_terms":["testomat 808","0,02 °dh","indikator"]}}
|
||||||
"id": "retrieval_exact_doc_001",
|
{"id":"retrieval_exact_doc_002","type":"retrieval","prompt":"Testomat EVO CALC","assert":{"selection_mode_in":["exact_document_title"],"min_results":1,"must_include_one_of_document_ids":["74fdad85-5e4e-4f08-8d95-402f3180ed55"],"must_include_any_terms":["evo calc","kalibrier","wasserhärte"]}}
|
||||||
"type": "retrieval",
|
{"id":"retrieval_exact_doc_003","type":"retrieval","prompt":"Testomat ECO PLUS","assert":{"selection_mode_in":["exact_document_title"],"min_results":1,"must_include_one_of_document_ids":["bace47f9-647e-4d47-95d9-118e553c6e5a"],"must_include_any_terms":["eco-plus","intervall","liter"]}}
|
||||||
"prompt": "Testomat 808",
|
{"id":"retrieval_exact_doc_004","type":"retrieval","prompt":"Testomat ECO","assert":{"selection_mode_in":["exact_document_title"],"min_results":1,"must_include_one_of_document_ids":["3d6c2add-c643-4e96-a3e7-5eb949c41303"],"must_include_any_terms":["testomat eco","intervall","spülzeit"]}}
|
||||||
"assert": {
|
{"id":"retrieval_exact_doc_005","type":"retrieval","prompt":"Testomat EVO TH","assert":{"selection_mode_in":["exact_document_title"],"min_results":1,"must_include_one_of_document_ids":["eb91c1be-4546-4ed5-8b01-f075519d675b"],"must_include_any_terms":["evo th","online-analysenautomat","digitale eingänge"]}}
|
||||||
"selection_mode_in": [
|
{"id":"retrieval_exact_doc_006","type":"retrieval","prompt":"Wasserhärte Grenzwert Testomat","assert":{"selection_mode_in":["exact_document_title"],"min_results":1,"must_include_one_of_document_ids":["60706498-867b-41b8-8e76-63248178d265"],"must_include_any_terms":["grenzwert","0,02 °dh","0,05 °dh"]}}
|
||||||
"exact_document_title"
|
{"id":"retrieval_semantic_001","type":"retrieval","prompt":"welche testomat geraete gibt es","assert":{"min_results":1,"must_include_one_of_document_ids":["cf6a1ff2-8afe-4ebe-951b-805d7324d0a3","26129c01-c09f-4c71-9c80-7ddffb6c77fb","74fdad85-5e4e-4f08-8d95-402f3180ed55"],"must_include_any_terms":["testomatwelt","geräteübersicht","wasserhärte"]}}
|
||||||
],
|
{"id":"retrieval_semantic_002","type":"retrieval","prompt":"welches testomat modell ist fuer hohe wasserhaerte geeignet","assert":{"min_results":1,"must_include_one_of_document_ids":["eb91c1be-4546-4ed5-8b01-f075519d675b","74fdad85-5e4e-4f08-8d95-402f3180ed55","26129c01-c09f-4c71-9c80-7ddffb6c77fb","60706498-867b-41b8-8e76-63248178d265"],"must_include_any_terms":["wasserhärte","grenzwert","testomat"]}}
|
||||||
"min_results": 1,
|
{"id":"retrieval_semantic_003","type":"retrieval","prompt":"welche testomat indikatoren gibt es","assert":{"min_results":1,"must_include_one_of_document_ids":["8db60a9f-3549-4567-b914-5e3d0d9ef715","f0422ac8-3d60-4b6c-ab97-8eba652d9eb3","5ced4bcb-aa9d-4032-9eee-37a33f744476","a9fedf75-bccc-4100-ac59-b6f4eef01e61","d11948da-4e77-48e3-bab2-d32f622343de"],"must_include_any_terms":["indikator","th 2250","th 2005","tc 2050","tc 2100"]}}
|
||||||
"must_include_one_of_document_ids": [
|
{"id":"retrieval_semantic_004","type":"retrieval","prompt":"welcher testomat ist ein verschneideregler","assert":{"min_results":1,"must_include_one_of_document_ids":["7fe9342f-2ca4-41ce-bdea-410b516ef6b4"],"must_include_any_terms":["verschneideregler","motorventil","0/4–20 ma"]}}
|
||||||
"26129c01-c09f-4c71-9c80-7ddffb6c77fb"
|
{"id":"retrieval_semantic_005","type":"retrieval","prompt":"welcher testomat hat automatische reinigung","assert":{"min_results":1,"must_include_one_of_document_ids":["b8c3343b-931e-4994-9d53-a2130efc846f","51589532-a1a1-46e0-94b2-a139dce78543"],"must_include_any_terms":["self clean","reinigung","messkammer"]}}
|
||||||
],
|
{"id":"retrieval_semantic_006","type":"retrieval","prompt":"welches geraet ist fuer chlorueberwachung gedacht","assert":{"min_results":1,"must_include_one_of_document_ids":["1d467913-a2d9-42e6-8510-83a65aba9403"],"must_include_any_terms":["thcl","chlor","online-analysegerät"]}}
|
||||||
"must_include_any_terms": [
|
{"id":"retrieval_semantic_007","type":"retrieval","prompt":"gibt es ein kompaktes kosteneffizientes haerteueberwachungsgeraet","assert":{"min_results":1,"must_include_one_of_document_ids":["afcf1cd6-9b02-4828-b11f-339096a3c864","3d6c2add-c643-4e96-a3e7-5eb949c41303"],"must_include_any_terms":["eco c","kosteneffizient","härteüberwachung"]}}
|
||||||
"testomat 808",
|
{"id":"retrieval_negative_001","type":"retrieval","prompt":"lieferbedingungen versand testomat","assert":{"min_results":1,"must_include_one_of_document_ids":["26ddf03d-9108-4a65-aa0e-a5df7613fa77"],"must_not_include_document_ids":["7166592f-85f2-425c-997b-73e323ae184d"]}}
|
||||||
"0,02 °dh",
|
{"id":"retrieval_negative_002","type":"retrieval","prompt":"testomat 2000 th 2005 sicherheitsdatenblatt","assert":{"min_results":1,"must_include_one_of_document_ids":["f0422ac8-3d60-4b6c-ab97-8eba652d9eb3","e3d05954-cde3-40bc-baf6-aa9a350a8aa2"],"must_not_include_document_ids":["26129c01-c09f-4c71-9c80-7ddffb6c77fb","74fdad85-5e4e-4f08-8d95-402f3180ed55"]}}
|
||||||
"indikator"
|
{"id":"retrieval_negative_003","type":"retrieval","prompt":"testomat 2000 self clean reinigungsloesung","assert":{"min_results":1,"must_include_one_of_document_ids":["51589532-a1a1-46e0-94b2-a139dce78543","b8c3343b-931e-4994-9d53-a2130efc846f"],"must_include_any_terms":["reinigungslösung","self clean"],"must_not_include_document_ids":["26129c01-c09f-4c71-9c80-7ddffb6c77fb"]}}
|
||||||
]
|
{"id":"retrieval_short_001","type":"retrieval","prompt":"evo th","assert":{"min_results":1,"must_include_one_of_document_ids":["eb91c1be-4546-4ed5-8b01-f075519d675b","74fdad85-5e4e-4f08-8d95-402f3180ed55"],"must_include_any_terms":["evo"]}}
|
||||||
}
|
{"id":"retrieval_short_002","type":"retrieval","prompt":"808","assert":{"min_results":1,"must_include_one_of_document_ids":["26129c01-c09f-4c71-9c80-7ddffb6c77fb"],"must_include_any_terms":["808"]}}
|
||||||
}
|
{"id":"retrieval_noise_001","type":"retrieval","prompt":"dsgfsdgfsdgf","assert":{"max_results":0}}
|
||||||
{
|
|
||||||
"id": "retrieval_exact_doc_002",
|
|
||||||
"type": "retrieval",
|
|
||||||
"prompt": "Testomat EVO CALC",
|
|
||||||
"assert": {
|
|
||||||
"selection_mode_in": [
|
|
||||||
"exact_document_title"
|
|
||||||
],
|
|
||||||
"min_results": 1,
|
|
||||||
"must_include_one_of_document_ids": [
|
|
||||||
"74fdad85-5e4e-4f08-8d95-402f3180ed55"
|
|
||||||
],
|
|
||||||
"must_include_any_terms": [
|
|
||||||
"evo calc",
|
|
||||||
"kalibrier",
|
|
||||||
"wasserhärte"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
{
|
|
||||||
"id": "retrieval_exact_doc_003",
|
|
||||||
"type": "retrieval",
|
|
||||||
"prompt": "Testomat ECO PLUS",
|
|
||||||
"assert": {
|
|
||||||
"selection_mode_in": [
|
|
||||||
"exact_document_title"
|
|
||||||
],
|
|
||||||
"min_results": 1,
|
|
||||||
"must_include_one_of_document_ids": [
|
|
||||||
"bace47f9-647e-4d47-95d9-118e553c6e5a"
|
|
||||||
],
|
|
||||||
"must_include_any_terms": [
|
|
||||||
"eco-plus",
|
|
||||||
"intervall",
|
|
||||||
"liter"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
{
|
|
||||||
"id": "retrieval_exact_doc_004",
|
|
||||||
"type": "retrieval",
|
|
||||||
"prompt": "Testomat ECO",
|
|
||||||
"assert": {
|
|
||||||
"selection_mode_in": [
|
|
||||||
"exact_document_title"
|
|
||||||
],
|
|
||||||
"min_results": 1,
|
|
||||||
"must_include_one_of_document_ids": [
|
|
||||||
"3d6c2add-c643-4e96-a3e7-5eb949c41303"
|
|
||||||
],
|
|
||||||
"must_include_any_terms": [
|
|
||||||
"testomat eco",
|
|
||||||
"intervall",
|
|
||||||
"spülzeit"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
{
|
|
||||||
"id": "retrieval_exact_doc_005",
|
|
||||||
"type": "retrieval",
|
|
||||||
"prompt": "Testomat EVO TH",
|
|
||||||
"assert": {
|
|
||||||
"selection_mode_in": [
|
|
||||||
"exact_document_title"
|
|
||||||
],
|
|
||||||
"min_results": 1,
|
|
||||||
"must_include_one_of_document_ids": [
|
|
||||||
"eb91c1be-4546-4ed5-8b01-f075519d675b"
|
|
||||||
],
|
|
||||||
"must_include_any_terms": [
|
|
||||||
"evo th",
|
|
||||||
"online-analysenautomat",
|
|
||||||
"digitale eingänge"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
{
|
|
||||||
"id": "retrieval_exact_doc_006",
|
|
||||||
"type": "retrieval",
|
|
||||||
"prompt": "Wasserhärte Grenzwert Testomat",
|
|
||||||
"assert": {
|
|
||||||
"selection_mode_in": [
|
|
||||||
"exact_document_title"
|
|
||||||
],
|
|
||||||
"min_results": 1,
|
|
||||||
"must_include_one_of_document_ids": [
|
|
||||||
"60706498-867b-41b8-8e76-63248178d265"
|
|
||||||
],
|
|
||||||
"must_include_any_terms": [
|
|
||||||
"grenzwert",
|
|
||||||
"0,02 °dh",
|
|
||||||
"0,05 °dh"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
{
|
|
||||||
"id": "retrieval_semantic_001",
|
|
||||||
"type": "retrieval",
|
|
||||||
"prompt": "welche testomat geraete gibt es",
|
|
||||||
"assert": {
|
|
||||||
"min_results": 1,
|
|
||||||
"must_include_one_of_document_ids": [
|
|
||||||
"cf6a1ff2-8afe-4ebe-951b-805d7324d0a3",
|
|
||||||
"26129c01-c09f-4c71-9c80-7ddffb6c77fb",
|
|
||||||
"74fdad85-5e4e-4f08-8d95-402f3180ed55"
|
|
||||||
],
|
|
||||||
"must_include_any_terms": [
|
|
||||||
"testomatwelt",
|
|
||||||
"geräteübersicht",
|
|
||||||
"wasserhärte"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
{
|
|
||||||
"id": "retrieval_semantic_002",
|
|
||||||
"type": "retrieval",
|
|
||||||
"prompt": "welches testomat modell ist fuer hohe wasserhaerte geeignet",
|
|
||||||
"assert": {
|
|
||||||
"min_results": 1,
|
|
||||||
"must_include_one_of_document_ids": [
|
|
||||||
"eb91c1be-4546-4ed5-8b01-f075519d675b",
|
|
||||||
"74fdad85-5e4e-4f08-8d95-402f3180ed55",
|
|
||||||
"26129c01-c09f-4c71-9c80-7ddffb6c77fb",
|
|
||||||
"60706498-867b-41b8-8e76-63248178d265"
|
|
||||||
],
|
|
||||||
"must_include_any_terms": [
|
|
||||||
"wasserhärte",
|
|
||||||
"grenzwert",
|
|
||||||
"testomat"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
{
|
|
||||||
"id": "retrieval_semantic_003",
|
|
||||||
"type": "retrieval",
|
|
||||||
"prompt": "welche testomat indikatoren gibt es",
|
|
||||||
"assert": {
|
|
||||||
"min_results": 1,
|
|
||||||
"must_include_one_of_document_ids": [
|
|
||||||
"8db60a9f-3549-4567-b914-5e3d0d9ef715",
|
|
||||||
"f0422ac8-3d60-4b6c-ab97-8eba652d9eb3",
|
|
||||||
"5ced4bcb-aa9d-4032-9eee-37a33f744476",
|
|
||||||
"a9fedf75-bccc-4100-ac59-b6f4eef01e61",
|
|
||||||
"d11948da-4e77-48e3-bab2-d32f622343de"
|
|
||||||
],
|
|
||||||
"must_include_any_terms": [
|
|
||||||
"indikator",
|
|
||||||
"th 2250",
|
|
||||||
"th 2005",
|
|
||||||
"tc 2050",
|
|
||||||
"tc 2100"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
{
|
|
||||||
"id": "retrieval_semantic_004",
|
|
||||||
"type": "retrieval",
|
|
||||||
"prompt": "welcher testomat ist ein verschneideregler",
|
|
||||||
"assert": {
|
|
||||||
"min_results": 1,
|
|
||||||
"must_include_one_of_document_ids": [
|
|
||||||
"7fe9342f-2ca4-41ce-bdea-410b516ef6b4"
|
|
||||||
],
|
|
||||||
"must_include_any_terms": [
|
|
||||||
"verschneideregler",
|
|
||||||
"motorventil",
|
|
||||||
"0/4–20 ma"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
{
|
|
||||||
"id": "retrieval_semantic_005",
|
|
||||||
"type": "retrieval",
|
|
||||||
"prompt": "welcher testomat hat automatische reinigung",
|
|
||||||
"assert": {
|
|
||||||
"min_results": 1,
|
|
||||||
"must_include_one_of_document_ids": [
|
|
||||||
"b8c3343b-931e-4994-9d53-a2130efc846f",
|
|
||||||
"51589532-a1a1-46e0-94b2-a139dce78543"
|
|
||||||
],
|
|
||||||
"must_include_any_terms": [
|
|
||||||
"self clean",
|
|
||||||
"reinigung",
|
|
||||||
"messkammer"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
{
|
|
||||||
"id": "retrieval_semantic_006",
|
|
||||||
"type": "retrieval",
|
|
||||||
"prompt": "welches geraet ist fuer chlorueberwachung gedacht",
|
|
||||||
"assert": {
|
|
||||||
"min_results": 1,
|
|
||||||
"must_include_one_of_document_ids": [
|
|
||||||
"1d467913-a2d9-42e6-8510-83a65aba9403"
|
|
||||||
],
|
|
||||||
"must_include_any_terms": [
|
|
||||||
"thcl",
|
|
||||||
"chlor",
|
|
||||||
"online-analysegerät"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
{
|
|
||||||
"id": "retrieval_semantic_007",
|
|
||||||
"type": "retrieval",
|
|
||||||
"prompt": "gibt es ein kompaktes kosteneffizientes haerteueberwachungsgeraet",
|
|
||||||
"assert": {
|
|
||||||
"min_results": 1,
|
|
||||||
"must_include_one_of_document_ids": [
|
|
||||||
"afcf1cd6-9b02-4828-b11f-339096a3c864",
|
|
||||||
"3d6c2add-c643-4e96-a3e7-5eb949c41303"
|
|
||||||
],
|
|
||||||
"must_include_any_terms": [
|
|
||||||
"eco c",
|
|
||||||
"kosteneffizient",
|
|
||||||
"härteüberwachung"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
{
|
|
||||||
"id": "retrieval_negative_001",
|
|
||||||
"type": "retrieval",
|
|
||||||
"prompt": "lieferbedingungen versand testomat",
|
|
||||||
"assert": {
|
|
||||||
"min_results": 1,
|
|
||||||
"must_include_one_of_document_ids": [
|
|
||||||
"26ddf03d-9108-4a65-aa0e-a5df7613fa77"
|
|
||||||
],
|
|
||||||
"must_not_include_document_ids": [
|
|
||||||
"7166592f-85f2-425c-997b-73e323ae184d"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
{
|
|
||||||
"id": "retrieval_negative_002",
|
|
||||||
"type": "retrieval",
|
|
||||||
"prompt": "testomat 2000 th 2005 sicherheitsdatenblatt",
|
|
||||||
"assert": {
|
|
||||||
"min_results": 1,
|
|
||||||
"must_include_one_of_document_ids": [
|
|
||||||
"f0422ac8-3d60-4b6c-ab97-8eba652d9eb3",
|
|
||||||
"e3d05954-cde3-40bc-baf6-aa9a350a8aa2"
|
|
||||||
],
|
|
||||||
"must_not_include_document_ids": [
|
|
||||||
"26129c01-c09f-4c71-9c80-7ddffb6c77fb",
|
|
||||||
"74fdad85-5e4e-4f08-8d95-402f3180ed55"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
{
|
|
||||||
"id": "retrieval_negative_003",
|
|
||||||
"type": "retrieval",
|
|
||||||
"prompt": "testomat 2000 self clean reinigungsloesung",
|
|
||||||
"assert": {
|
|
||||||
"min_results": 1,
|
|
||||||
"must_include_one_of_document_ids": [
|
|
||||||
"51589532-a1a1-46e0-94b2-a139dce78543",
|
|
||||||
"b8c3343b-931e-4994-9d53-a2130efc846f"
|
|
||||||
],
|
|
||||||
"must_include_any_terms": [
|
|
||||||
"reinigungslösung",
|
|
||||||
"self clean"
|
|
||||||
],
|
|
||||||
"must_not_include_document_ids": [
|
|
||||||
"26129c01-c09f-4c71-9c80-7ddffb6c77fb"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
{
|
|
||||||
"id": "retrieval_short_001",
|
|
||||||
"type": "retrieval",
|
|
||||||
"prompt": "evo th",
|
|
||||||
"assert": {
|
|
||||||
"min_results": 1,
|
|
||||||
"must_include_one_of_document_ids": [
|
|
||||||
"eb91c1be-4546-4ed5-8b01-f075519d675b",
|
|
||||||
"74fdad85-5e4e-4f08-8d95-402f3180ed55"
|
|
||||||
],
|
|
||||||
"must_include_any_terms": [
|
|
||||||
"evo"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
{
|
|
||||||
"id": "retrieval_short_002",
|
|
||||||
"type": "retrieval",
|
|
||||||
"prompt": "808",
|
|
||||||
"assert": {
|
|
||||||
"min_results": 1,
|
|
||||||
"must_include_one_of_document_ids": [
|
|
||||||
"26129c01-c09f-4c71-9c80-7ddffb6c77fb"
|
|
||||||
],
|
|
||||||
"must_include_any_terms": [
|
|
||||||
"808"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
{
|
|
||||||
"id": "retrieval_noise_001",
|
|
||||||
"type": "retrieval",
|
|
||||||
"prompt": "dsgfsdgfsdgf",
|
|
||||||
"assert": {
|
|
||||||
"max_results": 0
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|||||||
Reference in New Issue
Block a user