首页 > 解决方案 > How can I get get all possible paths from any tree using python function that takes dataframe?

问题描述

Given a rule-defined decision tree in a table format. I must find all possible paths that can be achieved. A path consists of an ordered list of nodes where the ultimate value is a decision {end_approve or end_deny}. All Node IDs can be assumed to be unique and must have binary endpoints.

The output should be a list of paths where each path is a list of nodes that it went through

[Node_ID1, NodeID2, …, Decision].

Example table (DataFrame) input:

Node_ID IF_TRUE IF_FALSE
1 2 3
2 End_approve 3
3 End_deny End_approve

Expected Output:

[[1, 2, approve], [1, 2, 3, deny], [1, 3, deny], [1, 2, 3, approve], [1, 3,
approve]]

'''

from collections import deque
 
class Node:
    def __init__(self, data, left=None, right=None):
        self.data = data
        self.left = left
        self.right = right
 
 
def isLeaf(node):
    return node.left is None and node.right is None
     
def printRootToLeafPaths(node, path):
 
    if node is None:
        return
 
    path.append(node.data)
 
    if isLeaf(node):
        print(list(path))
 
    printRootToLeafPaths(node.left, path)
    printRootToLeafPaths(node.right, path)
 
    path.pop()
 
 
def printRootToLeafPath(root):
 
    # list to store root-to-leaf path
    path = deque()
    printRootToLeafPaths(root, path)
    
def generate_paths(df_in):
    root = Node(1)
    root.right = Node(df_in.loc[0,'IF_FALSE'])
    root.left = Node(df_in.loc[0,'IF_TRUE'])

    if root.left.data in list(df_in['Node_ID']):
        idx = df_in.index[df_in['Node_ID'] == root.left.data][0]
        root.left.left = Node(df_in.loc[idx,'IF_TRUE'])
        root.left.right = Node(df_in.loc[idx,'IF_FALSE'])

    if root.right.data in list(df_in['Node_ID']):
        idx = df_in.index[df_in['Node_ID'] == root.right.data][0]
        root.right.left = Node(df_in.loc[idx,'IF_TRUE'])
        root.right.right = Node(df_in.loc[idx,'IF_FALSE'])

    if root.left.left.data in list(df_in['Node_ID']):
        idx = df_in.index[df_in['Node_ID'] == root.left.left.data][0]
        root.left.left.left = Node(df_in.loc[idx,'IF_TRUE'])
        root.left.left.right = Node(df_in.loc[idx,'IF_FALSE'])

    if root.left.right.data in list(df_in['Node_ID']):
        idx = df_in.index[df_in['Node_ID'] == root.left.right.data][0]
        root.left.right.left = Node(df_in.loc[idx,'IF_TRUE'])
        root.left.right.right = Node(df_in.loc[idx,'IF_FALSE'])

    if root.right.right.data in list(df_in['Node_ID']):
        idx = df_in.index[df_in['Node_ID'] == root.right.right.data][0]
        root.right.right.left = Node(df_in.loc[idx,'IF_TRUE'])
        root.right.right.right = Node(df_in.loc[idx,'IF_FALSE'])

    if root.right.left.data in list(df_in['Node_ID']):
        idx = df_in.index[df_in['Node_ID'] == root.right.left.data][0]
        root.right.left.left = Node(df_in.loc[idx,'IF_TRUE'])
        root.right.left.right = Node(df_in.loc[idx,'IF_FALSE'])
        
    printRootToLeafPath(root)

'''

标签: pythonpathbinary-search-tree

解决方案


推荐阅读