{"documentation": "Settings that generate the Makefile for the corpus pipeline", "order": ["struct_tag", "struct_tag_simple", "title", "title_sv", "documentation", "type", "properties"], "properties": {"corpus": {"default": "untitled", "description": "The corpus identifier", "description_sv": "Korpusidentifierare", "title": "Corpus name", "title_sv": "Korpusnamn", "type": "string"}, "lang": {"default": "sv", "enum": ["sv"], "title": "Analysis mode", "title_sv": "Analysmode", "type": "string"}, "named_entity_recognition": {"default": [], "description": "Structural attributes for named entity recognition", "description_sv": "Strukturella attribut for namnigenk\u00e4nning", "items": {"enum": ["ex", "type", "subtype"], "title": "Attribute", "title_sv": "Attribut", "type": "string"}, "title": "Named entity recognition", "title_sv": "Namntaggare", "type": "array"}, "order": ["corpus", "lang", "textmode", "word_segmenter", "sentence_segmentation", "paragraph_segmentation", "positional_attributes", "named_entity_recognition", "text_attributes"], "paragraph_segmentation": {"default": {"paragraph_segmenter": "blanklines"}, "description": "Paragraph segmenter to use, or a tag supplied with optional structural attributes", "description_sv": "Styckessegmenterare som ska anv\u00e4ndas, eller en tagg med valfria strukturella attribut", "title": "Paragraph segmentation", "title_sv": "Styckessegmentering", "type": [{"default": "blanklines", "properties": {"paragraph_segmenter": {"class": "typewriter", "default": "blanklines", "description": "Segmenter tool to use for paragraphs", "description_sv": "Modellen/verktyget som ska anv\u00e4ndas till styckessegmenteringen", "enum": ["whitespace", "linebreaks", "blanklines"], "title": "Segment by", "title_sv": "Segmentera med", "type": "string"}}, "title": "Segmenter", "title_sv": "Segmenterare", "type": "object"}, {"default": "none", "description": "Use this if there are no sensible paragraphs in the text", "description_sv": "Anv\u00e4nd detta alternativ om det inte finns n\u00e5gra rimliga stycken i texten", "enum": ["none"], "title": "No segmentation", "title_sv": "Ingen segmentering", "type": "string"}]}, "positional_attributes": {"default": {"compound_attributes": ["complemgram", "compwf"], "dependency_attributes": ["ref", "dephead", "deprel"], "lexical_attributes": ["pos", "msd", "lemma", "lex", "sense"], "order": ["lexical_attributes", "compound_attributes", "dependency_attributes"]}, "description": "Positional attributes to generate in the analysis. Attributes already present in the word tag must not appear here again.", "description_sv": "Positionella attribut som ska genereras i analysen. Attribut som har valts under 'ordtagg' f\u00e5r inte f\u00f6rekomma h\u00e4r.", "properties": {"compound_attributes": {"default": ["complemgram", "compwf", "prefix", "suffix"], "description": "Attributes for the compound analysis", "description_sv": "Attribut f\u00f6r sammans\u00e4ttningsanalysen", "items": {"enum": ["complemgram", "compwf", "prefix", "suffix"], "title": "Attribute", "title_sv": "Attribut", "type": "string"}, "title": "Compound analysis", "title_sv": "Sammans\u00e4ttningsanalys", "type": "array"}, "dependency_attributes": {"default": ["ref", "dephead", "deprel"], "description": "Attributes for the dependency analysis", "description_sv": "Attribut f\u00f6r dependsanalysen", "items": {"enum": ["ref", "dephead", "deprel"], "title": "Attribute", "title_sv": "Attribut", "type": "string"}, "title": "Dependency analysis", "title_sv": "Dependensanalys", "type": "array"}, "lexical_attributes": {"default": ["pos", "msd", "lemma", "lex", "sense"], "description": "Attributes for the lexical analysis", "description_sv": "Attribut f\u00f6r lexikalanalysen", "items": {"enum": ["pos", "msd", "lemma", "lex", "sense"], "title": "Attribute", "title_sv": "Attribut", "type": "string"}, "title": "Lexical analysis", "title_sv": "Lexikalanalys", "type": "array"}, "order": ["lexical_attributes", "compound_attributes", "dependency_attributes", "sentiment"], "sentiment": {"description": "Attributes for sentiment analysis", "description_sv": "Attribut f\u00f6r attitydanalysen", "items": {"enum": ["sentiment", "sentimentclass"], "title": "Attribute", "title_sv": "Attribut", "type": "string"}, "title": "Sentiment analysis", "title_sv": "Attitydanalys", "type": "array"}}, "title": "Positional attributes", "title_sv": "Positionella attribut", "type": "object"}, "sentence_segmentation": {"default": {"sentence_chunk": "paragraph", "sentence_segmenter": "default_tokenizer"}, "description": "Sentence segmenter to use, or a tag supplied with optional sentence attributes", "description_sv": "Meningssegmenterare som ska anv\u00e4ndas, eller en tagg med valfria menings-attribut", "title": "Sentence segmentation", "title_sv": "Meningssegmentering", "type": [{"default": "default_tokenizer", "properties": {"sentence_chunk": {"default": "paragraph", "description": "Chunk which the sentence segmentation should be based on", "description_sv": "Chunk som meningssegmenteringen ska baseras p\u00e5", "title": "Sentence chunk", "title_sv": "Meningschunk", "type": [{"class": "typewriter", "enum": ["text", "paragraph"], "title": "Pre-defined attribute", "title_sv": "F\u00f6rdefinerad attribut", "type": "string"}]}, "sentence_segmenter": {"class": "typewriter", "default": "default_tokenizer", "description": "Segmenter tool to use for sentences", "description_sv": "Modellen/verktyget som ska anv\u00e4ndas till meningssegmenteringen", "enum": ["whitespace", "linebreaks", "blanklines", "default_tokenizer", "punctuation"], "title": "Segment by", "title_sv": "Segmentera med", "type": "string"}}, "title": "Segmenter", "title_sv": "Segmenterare", "type": "object"}]}, "text_attributes": {"default": {"order": ["readability_metrics"], "readability_metrics": ["lix", "ovix", "nk"]}, "description": "Text attributes to generate in the analysis. Attributes already present under 'Document element > Structural attributes' must not appear here again.", "description_sv": "Textattribut som ska genereras i analysen. Attribut som har valts under 'Dokumentelement > Strukturella attribut' f\u00e5r inte f\u00f6rekomma h\u00e4r.", "properties": {"order": ["readability_metrics"], "readability_metrics": {"default": ["lix", "ovix", "nk"], "description": "Attributes for different readability metrics", "description_sv": "Attribut f\u00f6r olika sorters l\u00e4sbarhetsv\u00e4rden", "items": {"enum": ["lix", "ovix", "nk"], "title": "Attribute", "title_sv": "Attribut", "type": "string"}, "title": "Readability metrics", "title_sv": "L\u00e4sbarhetsv\u00e4rden", "type": "array"}}, "title": "Text attributes", "title_sv": "Textattribut", "type": "object"}, "textmode": {"default": "plain", "description": "Input format for the pipeline", "description_sv": "Indataformatet till importkedjan", "enum": ["plain"], "title": "Text input mode", "title_sv": "Textl\u00e4ge", "type": "string"}, "word_segmenter": {"default": "default_tokenizer", "description": "Word segmenter to use, or a tag optionally supplied with positional attributes", "description_sv": "Ordsegmenterare som ska anv\u00e4ndas, eller en tagg med valfria positionella attribut", "title": "Word segmentation", "title_sv": "Ordsegmentering", "type": [{"class": "typewriter", "description": "Segmenter tool to use for words", "description_sv": "Modellen/verktyget som ska anv\u00e4ndas till ordsegmenteringen", "enum": ["whitespace", "linebreaks", "blanklines", "default_tokenizer"], "title": "Segmenter", "title_sv": "Segmenterare", "type": "string"}]}}, "struct_tag": {"description": "A tag for a structral element", "description_sv": "En tagg f\u00f6r ett strukturellt element", "properties": {"attributes": {"default": [], "description": "The attributes of this tag in the XML", "description_sv": "Attribut inom den taggen som ska f\u00e5ngas in fr\u00e5n XML:en", "items": {"title": "Structural attribute", "title_sv": "Strukturellt attribut", "type": "string"}, "title": "Structural attributes", "title_sv": "Strukturella attribut", "type": "array"}, "order": ["tag", "attributes"], "tag": {"description": "The identifer of the tag in the XML", "description_sv": "Taggens identifierare i XML:en", "title": "Tag", "title_sv": "Tagg", "type": "string"}}, "title": "Structural element tag", "title_sv": "Strukturellt-element-tagg", "type": "object"}, "struct_tag_simple": {"description": "Attribute tag which segmentation is based on", "description_sv": "Attribut-tagg som segmenteringen \u00e4r baserad p\u00e5", "title": "Costum attribute tag", "title_sv": "Egen attribut-tagg", "type": "string"}, "title": "Corpus pipeline makefile settings", "title_sv": "Makefilsinst\u00e4llningar till korpusimportkedjan", "type": "object"}