随机森林算法python代码,python为什么叫爬虫

from random导入样本，random，choice，randint

来自匹配导入中心，日志

来自utils import run _ time

classnode(object ) :

def __init__(self，size ) :

' ' nodeclasstobuildtreeleaveskeywordarguments : size { int }-- nodesize (default : { none } ) ' '

# Node size

self.size=size

# Feature to split

self.split_feature=None

#拆分点

self.split_point=None

#左电池节点

self.left=None

#光周期节点

self.right=None

分类策略树(对象) :

ef_init_(self，x，n_samples，max_depth ) :

' ' isolationtreeclassarguments 3360 x { list }--2dlistwithintorfloatn _ samples { int }-- subsamplesizemax _ depth {

self.height=0

# in case ofn _ samplesisgreaterthann

n=len(x )

if n_samples n:

n_samples=n

#根节点

self.root=node(n_samples )。

#构建隔离树

self._build_tree(x，n_samples，max_depth ) )。

def_get_split(self，x，idx，split_feature ) :

' ' ' randomlychooseasplitpointarguments : x { list }--2dlistobjectwithintorfloatidx { list }--1dlistobjectwithintsplints

# thesplitpointshouldbegreaterthanmin (x [ feature ] ) () () () ) () () ) () ) ) 652

unique=set(map ) lambdai : x [ I ] [ split _ feature ]，idx ) )

# Cannot split

iflen(unique )==1:

返回无

unique.remove(min ) unique ) )

x_min，x_max=min(unique )，max (unique ) )。

# caution : random (-xintheinterval [ 0，1 ]。

返回随机(* ) * (x_max - x_min ) x_min

def_build_tree(self，x，n_samples，max_depth ) :

' ' thecurrentnodedataspaceisdividedinto2subspace : lessthanthesplitpointinthespecifieddimensiontheleftchildofthecurrent de de putgreaterthanorequaltosplitpointdataonthecurrentnode ' srightchild.recursivelyconstructnewchildnodesuntilthedatacanotbesplld nodeshavereachedthemax _ depth.arguments : x { list }--2dlistobjectwithintorfloatn _ samples-subsamplesizemax _ demax

#数据shape

(m=len(x[0] ) () ) ) ) ) ) ) ) ) ) ) ) ) 652

len(X)

# Randomly selected sample points into the root node of the tree

idx = sample(range(n), n_samples)

# Depth, Node and idx

# BFS

while que and que[0][0] <= max_depth:

depth, nd, idx = que.pop(0)

# Stop split if X cannot be splitted

nd.split_feature = choice(range(m))

nd.split_point = self._get_split(X, idx, nd.split_feature)

if nd.split_point is None:

continue

# Split

idx_left = []

idx_right = []

while idx:

i = idx.pop()

xi = X[i][nd.split_feature]

if xi < nd.split_point:

idx_left.append(i)

else:

idx_right.append(i)

# Generate left and right child

nd.left = Node(len(idx_left))

nd.right = Node(len(idx_right))

# Put the left and child into the que and depth plus one

# Update the height of nqdds/p>

self.height = depth

def _predict(self, xi):

"""Auxiliary function of predict.Arguments:xi{list}-- 1D list with int or floatReturns:int -- the depth of the node which the xi belongs to"""

# Search xi from the IsolationTree until xi is at an leafnode

nd = self.root

depth = 0

while nd.left and nd.right:

if xi[nd.split_feature] < nd.split_point:

nd = nd.left

else:

nd = nd.right

depth += 1

return depth, nd.size

class dddch(object):

def __init__(self):

"""dddch, randomly build some IsolationTree instance,and the average score of each IsolationTreeAttributes:trees{list}-- 1d list with IsolationTree objectsajustment{float}"""

self.trees = None

self.adjustment = None # TBC

def fit(self, X, n_samples=100, max_depth=10, n_trees=256):

"""Build dddch with dataset XArguments:X{list}-- 2d list with int or floatKeyword Arguments:n_samples{int}-- According to paper, set number of samples to 256 (default:{256})max_depth{int}-- Tree height limit (default:{10})n_trees{int}-- According to paper, set number of trees to 100 (default:{100})"""

self.adjustment = self._get_adjustment(n_samples)

self.trees = [IsolationTree(X, n_samples, max_depth)

for _ in range(n_trees)]

def _get_adjustment(self, node_size):

"""Calculate adjustment according to the formula in the paper.Arguments:node_size{int}-- Number of leaf nodesReturns:float -- ajustment"""

if node_size > 2:

i = node_size - 1

ret = 2 * (log(i) + 0.5772156649) - 2 * i / node_size

elif node_size == 2:

ret = 1

else:

ret = 0

return ret

def _predict(self, xi):

"""Auxiliary function of predict.Arguments:xi{list}-- 1d list object with int or floatReturns:list -- 1d list object with float"""

# Calculate average score of xi at each tree

score = 0

n_trees = len(self.trees)

for tree in self.trees:

depth, node_size = tree._predict(xi)

score += (depth + self._get_adjustment(node_size))

score = score / n_trees

# Scale

return 2 ** -(score / self.adjustment)

def predict(self, X):

"""Get the prediction of y.Arguments:X{list}-- 2d list object with int or floatReturns:list -- 1d list object with float"""

return [self._predict(xi) for xi in X]

@run_time

def main():

print("Comparing average score of X and outlier's score...")

# Generate a dataset randomly

n = 100

X = [[random() for _ in range(5)] for _ in range(n)]

# Add outliers

X.append([10]*5)

# Train model

clf = dddch()

clf.fit(X, n_samples=500)

# Show result

print("Average score is%.2f" % (sum(clf.predict(X)) / len(X)))

print("Outlier's score is%.2f" % clf._predict(X[-1]))

if __name__ == "__main__":

main()