"""
30.Reading morphological analysis results Permalink
Morphological analysis result (neko.txt.Implement a program that reads mecab).
However, each morpheme is stored in a mapping type with the key of surface, uninflected, part of speech (pos), and part of speech subclassification 1 (pos1).
Express one sentence as a list of morphemes (mapping type). For the rest of the problems in Chapter 4, use the program created here.
- `neko.txt.mecab`Is`ans30.sh`Created in.
-Future questions`30_neko_mecab.json`use
Part of speech,Part of speech細分類1,Part of speech細分類2,Part of speech細分類3,Inflected form,Utilization type,Prototype,reading,pronunciation
['noun', '代noun', 'General', '*', '*', '*', 'I', 'Wagamama', 'Wagamama']
"""
from typing import List
import MeCab
import utils
def read_file(path: str) -> List[str]:
    data = []
    with open(path) as f:
        for line in f:
            line = line.strip()
            if line != "":
                data.append(line)
    return data
def parse(sent: str) -> List[str]:
    node = tagger.parseToNode(sent)
    result = []
    while node:
        node_dic = {}
        features = node.feature.split(",")
        node_dic["surface"] = node.surface  #Surface
        node_dic["base"] = features[6]  #Uninflected word (base)
        node_dic["pos"] = features[0]  #Part of speech (pos)
        node_dic["pos1"] = features[1]  #Part of speech subclassification 1 (pos1)
        result.append(node_dic)
        node = node.next
    return result
file_path = "neko.txt"
data = read_file(file_path)
# ['one', 'I am a cat.', 'There is no name yet.', 'I have no idea where I was born.']
tagger = MeCab.Tagger("-r /usr/local/etc/mecabrc")
result = [parse(sent) for sent in data]
# ans30
utils.save_json(result, "30_neko_mecab.json")
data = utils.read_json("30_neko_mecab.json")
utils.py:
import itertools
import json
from typing import Any, List
def save_json(data: Any, save_path: str) -> None:
    """Save data to json format.
    Args:
        data (Any): The data to store.
        save_path (str): Path to save.
    """
    with open(save_path, "w", encoding="utf-8") as f:
        json.dump(data, f, indent=2, ensure_ascii=False)
def read_json(path: str) -> List[Any]:
    """Read json data
    Args:
        path (str): Path of file to read.
    Returns:
        List[Any]: FTSE entity data.
    """
    with open(path, "r", encoding="utf-8") as f:
        data = json.load(f)
    return data
def flat(sequence: List[List[Any]]) -> List[Any]:
    return list(itertools.chain(*sequence))
        Recommended Posts