The other day I learned about 100 Days Of Code, which was popular on Twitter for a while. The purpose of this article is to keep a record and output how much I, as a beginner, can grow through 100 days of study. I think there are many mistakes and difficult to read. I would appreciate it if you could point out!
--Progress: Pages 64-69 --Chapter 3: Classes and Inheritance ――I will write down what I often forget or didn't know about what I learned today.
--Polymorphism is one of the methods in which multiple classes in a certain hierarchy implement each version of a certain method.
For example, suppose you're writing an implementation of MapReduce and want a common class that represents your input data. Define a common class with a read method that needs to be defined in a subclass as follows: Here, MapReduce is a program for distributed processing of a large amount of divided data in a cluster.
class InputData(object):
def read(self):
raise NotImplementedError
Inherit the InputData class and define subclasses
class PathInputData(InputData):
def __init__(self, path):
super().__init__() #Is of InputData__init__()Should run, but in InputData__init__I don't have it, but is it necessary?...?
self.path = path
def read(self):
return open(self.path).read()
Next, define a MapReduce Worker that uses the input data.
class Worker(object):
def __init_(self, input_data):
self.input_data = input_data
self.result = None
def map(self):
raise NotImplementedError
def reduce(self, other):
raise NotImplementedError
Next, we inherit the Worker class and define a line feed counter.
class LineCountWorker(Worker):
def map(self):
data = self.input_data.read()
self.result = data.count('\n')
def reduce(self, other):
self.result += other.result
Consider how to integrate the classes you have defined so far. The first is to use helper functions to build objects and work together manually.
import os
from threading import Thread
#List the contents of the directory and generate a PathInputData instance for each file it contains
def generate_inputs(data_dir):
for name in os.listdir(data_dir):
yield PathInputData(os.path.join(data_dir, name))
# generate_Create a LineCountWorker instance using the inputData instance returned by inputs
def create_workers(input_list):
workers = []
for input_data in input_list:
workers.append(LineCountWorker(input_data))
return workers
def execute(workers):
threads = [Thread(target=w.map) for w in workers]
for thread in threads: thread.start()
for thread in threads: thread.join()
first, rest = workers[0], workers[1:]
for worker in rest:
first.reduce(worker)
return first.result
#Finally, put them together into a function that performs each step
def mapreduce(data_dir):
inputs = generate_inputs(data_dir)
workers = create_workers(inputs)
return execute(workers)
The problem with this integration method is that if you write other subclasses such as InputData or Worker, you will have to rewrite generate_inputs and create_workers so that the mapreduce function will support them. The way to solve this problem is to use @classmethod. This applies to the entire class, not to Kochi objects. The code that applies this method to the MapReduce class is as follows.
import os
from threading import Thread
class InputData(object):
def read(self):
raise NotImplementedError
class PathInputData(InputData):
def __init__(self, path):
super().__init__()
self.path = path
def read(self):
return open(self.path).read()
class Worker(object):
def __init_(self, input_data):
self.input_data = input_data
self.result = None
def map(self):
raise NotImplementedError
def reduce(self, other):
raise NotImplementedError
class LineCountWorker(Worker):
def map(self):
data = self.input_data.read()
self.result = data.count('\n')
def reduce(self, other):
self.result += other.result
def generate_inputs(data_dir):
for name in os.listdir(data_dir):
yield PathInputData(os.path.join(data_dir, name))
def create_workers(input_list):
workers = []
for input_data in input_list:
workers.append(LineCountWorker(input_data))
return workers
def execute(workers):
threads = [Thread(target=w.map) for w in workers]
for thread in threads: thread.start()
for thread in threads: thread.join()
first, rest = workers[0], workers[1:]
for worker in rest:
first.reduce(worker)
return first.result
class GenericInputData(object):
def rad(self):
raise NotImplementedError
@classmethod
def generate_inputs(cls, config):
raise NotImplementedError
class PathInputData(GenericInputData):
def __init__(self, path):
super().__init__()
self.path = path
def read(self):
with open(self.path) as f:
return f.read()
@classmethod
def generate_inputs(cls, config):
data_dir = config['data_dir']
for name in os.listdir(data_dir):
yield cls(os.path.join(data_dir, name))
class GenericWorker:
def __init__(self, input_data):
self.input_data = input_data
self.result = None
def map(self):
raise NotImplementedError
def reduce(self, other):
raise NotImplementedError
@classmethod
def create_workers(cls, input_class, config):
workers = []
for input_data in input_class.generate_inputs(config):
workers.append(cls(input_data))
return workers
class LineCountWorker(GenericWorker):
def map(self):
data = self.input_data.read()
self.result = data.count('\n')
def reduce(self, other):
self.result += other.result
def mapreduce(worker_class, input_class, config):
workers = worker_class.create_workers(input_class, config)
return execute(workers)
This code outputs the same result as the previous implementation. This way of writing eliminates the need to rewrite the relevant code when you change the GenericInputData or GenericWorker subclass.
Recommended Posts