Python+Neo4j+Django+Echarts知识图谱可视化

语言: CN / TW / HK
;width:100%">

highlight: a11y-dark theme: smartblue


0 版本

python == 3.8.0 Django == 4.1.2 py2neo == 2021.2.3 pyecharts == 1.9.1

1 目的及功能

目的: 由于neo4j自带的可视化界面展示效果有限,重建可视化界面
功能: 实现与neo4j数据库实时连接,可动态更新,动态显示

2 Neo4j

2.1 Neo4安装

在Neo4j官网下载社区版

image.png 下载对应版本压缩包,解压即可。我使用的是Ubuntu系统,解压完成后进入bin文件夹下 image.png 在当前界面启动终端,输入启动命令: ./neo4j console 可以看到启动成功后可以在 localhost:7474打开neo4j image.png

默认用户名和密码是:neo4j 登录后会提示你修改,如果想更换用户名和密码可以在system用户下操作。这里不再赘述。 image.png

2.2 使用py2neo导入数据

本文使用json格式对数据存储,参考[1][2]

image.png

导入到neo4j ``` python

!/usr/bin/env python3

coding: utf-8

File: MilitaryGraph.py

Author: zpeng

Date: 22-10-20

import os import json import codecs from py2neo import Graph,Node

class MilitaryGraph: def init(self): c_dir = '/'.join(os.path.abspath(file).split('/')[:-1]) self.datapath = os.path.join(c_dir,'military.json') self.g = Graph('http://localhost:7474/',auth=("neo4j","123456"))

def read_nodes(self):

    # 节点类别
    military_sys = [] #实体名称
    categorys = [] #类别
    performances = [] #性能指标
    locations = [] #地点
    deploys = [] #部署方式
    exps = [] #案例

    militarysys_infos = [] #系统信息

    # 实体关系
    rels_per = [] #实体和性能的关系
    rels_subcategory = [] #实体与小类之间的关系
    rels_categorys = [] #大类与小类之间的关系
    rels_rootcategory =[] #大类和根节点之间的关系
    rels_loca = [] #实体与地点的关系
    rels_deploy = [] #实体与部署的关系
    rels_exp = [] #实体与案例的关系

    cont = 0
    for data in open(self.datapath):
        militarysys_dict = {}
        cont +=1
        print(cont)
        data_json = json.loads(data)
        militarysys = data_json['name']
        militarysys_dict['name'] = militarysys
        military_sys.append(militarysys)
        militarysys_dict['desc'] = ''
        militarysys_dict['category'] = ''
        militarysys_dict['performance'] = ''
        militarysys_dict['location'] = ''
        militarysys_dict['deploy'] = ''
        militarysys_dict['exp'] = ''

        if 'performance' in data_json:
            performances += data_json['performance']
            for performance in data_json['performance']:
                rels_per.append([militarysys,performance])
        if 'desc' in data_json:
            militarysys_dict['desc'] = data_json['desc']
        if 'location' in data_json:
            locations += data_json['location']
            for location in data_json['location']:
                rels_loca.append([militarysys,location])
        if 'deploy' in data_json:
            deploys += data_json['deploy']
            for deploy in data_json['deploy']:
                rels_deploy.append([militarysys,deploy])
        if 'exp' in data_json:
            exps += data_json['exp']
            for exp in data_json['exp']:
                rels_exp.append([militarysys,exp])
        if 'category' in data_json:
            cure_categorys = data_json['category']
            if len(cure_categorys) == 2:
                rels_rootcategory.append([cure_categorys[1],cure_categorys[0]])
                rels_subcategory.append([militarysys,cure_categorys[1]])
            if len(cure_categorys) == 3:
                root = cure_categorys[0]
                big = cure_categorys[1]
                small = cure_categorys[2]
                rels_rootcategory.append([big,root])
                rels_categorys.append([small,big])
                rels_subcategory.append([militarysys,small])

            militarysys_dict['category'] = cure_categorys
            categorys += cure_categorys

        militarysys_infos.append(militarysys_dict)
    return militarysys_infos, set(military_sys),set(locations),set(performances),set(categorys),set(exps),set(deploys),\
        rels_per,rels_loca,rels_deploy,rels_exp,rels_subcategory,rels_categorys,rels_rootcategory

'''创建实体节点'''
def create_node(self, label, nodes):
    count = 0
    for node_name in nodes:
        node = Node(label, name=node_name)
        self.g.create(node)
        count += 1
        print(count, len(nodes))
    return

def create_militarysys_nodes(self, militarysys_infos):
    count = 0
    for militarysys_dict in militarysys_infos:
        node = Node("MSYS", name=militarysys_dict['name'], desc=militarysys_dict['desc'],
                    performance = militarysys_dict['performance'],
                    category=militarysys_dict['category'] ,location=militarysys_dict['location'],
                    deploy=militarysys_dict['deploy'],exp=militarysys_dict['exp'])
        self.g.create(node)
        count += 1
        print(count)
    return


def create_graphnodes(self):
    militarysys_infos, military_sys,locations,performances,categorys,exps,deploys,\
        rels_per,rels_loca,rels_deploy,rels_exp,rels_subcategory,rels_categorys,rels_rootcategory = self.read_nodes()
    self.create_militarysys_nodes(militarysys_infos)
    self.create_node('LOCA',locations)
    self.create_node('PERF',performances)
    self.create_node('CATE',categorys)
    print(len(categorys))
    self.create_node('EXPS',exps)
    self.create_node('DEPLOY',deploys)
    return

'''创建实体关联边'''
def create_graphrels(self):
    militarysys_infos, military_sys,locations,performances,categorys,exps,deploys,\
        rels_per,rels_loca,rels_deploy,rels_exp,rels_subcategory,rels_categorys,rels_rootcategory = self.read_nodes()
    self.create_relationship('MSYS','PERF',rels_per,'performances','性能指标')
    self.create_relationship('MSYS','LOCA',rels_loca,'locations','位置')
    self.create_relationship('MSYS','DEPLOY',rels_deploy,'deploys','部署')
    self.create_relationship('MSYS','CATE',rels_subcategory,'belongsto','所属类别')
    self.create_relationship('CATE','CATE',rels_categorys,'belongsto','属于')
    self.create_relationship('CATE','CATE',rels_rootcategory,'belongsto','属于')
    self.create_relationship('MSYS','EXPS',rels_exp,'examples','案例')


def create_relationship(self, start_node, end_node, edges, rel_type, rel_name):
    count = 0
    # 去重处理
    set_edges = []
    for edge in edges:
        set_edges.append('###'.join(edge))
    all = len(set(set_edges))
    for edge in set(set_edges):
        edge = edge.split('###')
        p = edge[0]
        q = edge[1]
        query = "match(p:%s),(q:%s) where p.name='%s'and q.name='%s' create (p)-[rel:%s{name:'%s'}]->(q)" % (
            start_node, end_node, p, q, rel_type, rel_name)
        try:
            self.g.run(query)
            count += 1
            print(rel_type, count, all)
        except Exception as e:
            print(e)
    return

'''导出数据txt格式'''   
def export_txtdata(self):
    militarysys_infos, military_sys,locations,performances,categorys,exps,deploys,\
        rels_per,rels_loca,rels_deploy,rels_exp,rels_subcategory,rels_categorys,rels_rootcategory = self.read_nodes()
    f_military_sys = open('militarysys.txt', 'w+')
    f_locations = open('locations.txt', 'w+')
    f_performances = open('performances.txt', 'w+')
    f_categorys = open('categorys.txt', 'w+')
    f_exps = open('exps.txt', 'w+')
    f_deploys = open('deploys.txt', 'w+')

    f_military_sys.write('\n'.join(list(military_sys)))
    f_locations.write('\n'.join(list(locations)))
    f_performances.write('\n'.join(list(performances)))
    f_categorys.write('\n'.join(list(categorys)))
    f_exps.write('\n'.join(list(exps)))
    f_deploys.write('\n'.join(list(deploys)))

    f_military_sys.close()
    f_locations.close()
    f_performances.close()
    f_categorys.close()
    f_exps.close()
    f_deploys.close()

    return

'''导出数据为json格式'''
def export_data(self,data,path):
    if isinstance(data[0],str):
        data = sorted([d.strip("...") for d in set(data)])
    with codecs.open(path, 'w', encoding='utf-8') as f:
        json.dump(data, f, indent=4, ensure_ascii=False)

def export_entitys_relations(self):
    militarysys_infos, military_sys,locations,performances,categorys,exps,deploys,\
        rels_per,rels_loca,rels_deploy,rels_exp,rels_subcategory,rels_categorys,rels_rootcategory = self.read_nodes()
    #导出实体,属性
    self.export_data(list(military_sys),'./graph_data/military_sys.json')
    self.export_data(list(locations),'./graph_data/locations.json')
    self.export_data(list(performances),'./graph_data/performances.json')
    self.export_data(list(categorys),'./graph_data/categorys.json')
    self.export_data(list(exps),'./graph_data/exps.json')
    self.export_data(list(deploys),'./graph_data/deploys.json')
    #导出关系
    self.export_data(rels_per,'./graph_data/rels_per.json')
    self.export_data(rels_loca,'./graph_data/rels_loca.json')
    self.export_data(rels_deploy,'./graph_data/rels_deploy.json')
    self.export_data(rels_exp,'./graph_data/rels_exp.json')
    self.export_data(rels_subcategory,'./graph_data/rels_subcategory.json')
    self.export_data(rels_categorys,'./graph_data/rels_categorys.json')
    self.export_data(rels_rootcategory,'./graph_data/rels_rootcategory.json')

if name == 'main': handler = MilitaryGraph() print("step1:导入图谱节点中") handler.create_graphnodes() print("step2:导入图谱边中")
handler.create_graphrels()
# print("step3:导出数据") # handler.export_entitys_relations() ``` 导入后在neo4j数据库查看

image.png 包涵136个节点,150组关系。 输入查询语句 MATCH(n) RETURN n 输出所有节点和关系

3 前后端控制

使用Django进行后端控制,Echarts进行前端显示。参考[3]项目结构

image.png

3.1 Django后端

后端使用Django控制,对view进行改写,增加分类 view.py文件内容 ``` import json from py2neo import * from django.shortcuts import render graph = Graph('http://localhost:7474/', auth=('zp', '123456')) # 连接数据库

def search_all(): # 定义data数组,存放节点信息 data = [] # 定义关系数组,存放节点间的关系 links = [] # 查询所有节点,并将节点信息取出存放在data数组中 for n in graph.nodes: # 将节点信息转化为json格式,否则中文会不显示 # print(n) nodesStr = json.dumps(graph.nodes[n], ensure_ascii=False) # 取出节点的name node_name = json.loads(nodesStr)['name']

    # 构造字典,存储单个节点信息
    dict = {
        # 'id':str(n), # 防止重复节点
        'name': node_name,
        'symbolSize': 50,
        'category': '对象'
    }
    # 将单个节点信息存放在data数组中
    data.append(dict)
# 查询所有关系,并将所有的关系信息存放在links数组中
rps = graph.relationships
for r in rps:
    # 取出开始节点的name
    source = str(rps[r].start_node['name'])
    # for i in data: #需要使用ID
    #     if source == i['name']:
    #         source = i['id']
    # 取出结束节点的name
    target = str(rps[r].end_node['name'])
    # for i in data: #需要使用ID
    #     if target == i['name']:
    #         target = i['id']
    # 取出开始节点的结束节点之间的关系
    name = str(type(rps[r]).__name__)
    # 构造字典存储单个关系信息
    dict = {
        'source': source,
        'target': target,
        'name': name
    }
    # 将单个关系信息存放进links数组中
    links.append(dict)
# 输出所有节点信息
# for item in data:
#     print(item)
# 输出所有关系信息
# for item in links:
#     print(item)
# 将所有的节点信息和关系信息存放在一个字典中
neo4j_data = {
    'data': data,
    'links': links
}
neo4j_data = json.dumps(neo4j_data)
return neo4j_data

def search_all_category(): data = []# 定义data数组,存放节点信息 links = []# 定义关系数组,存放节点间的关系 # 节点分类 node_DEPLOY = graph.run('MATCH (n:DEPLOY) RETURN n').data() node_CATE = graph.run('MATCH (n:CATE) RETURN n').data() node_EXPS = graph.run('MATCH (n:EXPS) RETURN n').data() node_LOCA = graph.run('MATCH (n:LOCA) RETURN n').data() node_MSYS = graph.run('MATCH (n:MSYS) RETURN n').data() node_PERF = graph.run('MATCH (n:PERF) RETURN n').data()

for n in node_DEPLOY:    
    nodesStr = json.dumps(n, ensure_ascii=False)# 将节点信息转化为json格式,否则中文会不显示
    node_name = json.loads(nodesStr)
    node_name = node_name['n']['name']   # 取出节点的name
    # print(node_name)
    dict = {
        # 'id':str(n), # 防止重复节点
        'name': node_name,
        'symbolSize': 50,
        'category': 'DEPLOY'
    }
    data.append(dict) # 将单个节点信息存放在data数组中
for n in node_CATE:    
    nodesStr = json.dumps(n, ensure_ascii=False)# 将节点信息转化为json格式,否则中文会不显示
    node_name = json.loads(nodesStr)
    node_name = node_name['n']['name']   # 取出节点的name
    # print(node_name)
    dict = {
        # 'id':str(n), # 防止重复节点
        'name': node_name,
        'symbolSize': 50,
        'category': 'CATE'
    }
    data.append(dict) # 将单个节点信息存放在data数组中
for n in node_EXPS:    
    nodesStr = json.dumps(n, ensure_ascii=False)# 将节点信息转化为json格式,否则中文会不显示
    node_name = json.loads(nodesStr)
    node_name = node_name['n']['name']   # 取出节点的name
    # print(node_name)
    dict = {
        # 'id':str(n), # 防止重复节点
        'name': node_name,
        'symbolSize': 50,
        'category': 'EXPS'
    }
    data.append(dict) # 将单个节点信息存放在data数组中
for n in node_LOCA:    
    nodesStr = json.dumps(n, ensure_ascii=False)# 将节点信息转化为json格式,否则中文会不显示
    node_name = json.loads(nodesStr)
    node_name = node_name['n']['name']   # 取出节点的name
    # print(node_name)
    dict = {
        # 'id':str(n), # 防止重复节点
        'name': node_name,
        'symbolSize': 50,
        'category': 'LOCA'
    }
    data.append(dict) # 将单个节点信息存放在data数组中
for n in node_MSYS:    
    nodesStr = json.dumps(n, ensure_ascii=False)# 将节点信息转化为json格式,否则中文会不显示
    node_name = json.loads(nodesStr)
    node_name = node_name['n']['name']   # 取出节点的name
    # print(node_name)
    dict = {
        # 'id':str(n), # 防止重复节点
        'name': node_name,
        'symbolSize': 50,
        'category': 'MSYS'
    }
    data.append(dict) # 将单个节点信息存放在data数组中
for n in node_PERF:    
    nodesStr = json.dumps(n, ensure_ascii=False)# 将节点信息转化为json格式,否则中文会不显示
    node_name = json.loads(nodesStr)
    node_name = node_name['n']['name']   # 取出节点的name
    # print(node_name)
    dict = {
        # 'id':str(n), # 防止重复节点
        'name': node_name,
        'symbolSize': 50,
        'category': 'PERF'
    }
    data.append(dict) # 将单个节点信息存放在data数组中

# 查询所有关系,并将所有的关系信息存放在links数组中
rps = graph.relationships
for r in rps:
    source = str(rps[r].start_node['name']) # 取出开始节点的name
    target = str(rps[r].end_node['name']) 
    name = str(type(rps[r]).__name__)# 取出开始节点的结束节点之间的关系
    # 构造字典存储单个关系信息
    dict = {
        'source': source,
        'target': target,
        'name': name
    }
    links.append(dict)# 将单个关系信息存放进links数组中
neo4j_data = {
    'data': data,
    'links': links
}
neo4j_data = json.dumps(neo4j_data)
return neo4j_data

def search_one(value): # 定义data数组存储节点信息 data = [] # 定义links数组存储关系信息 links = [] # 查询节点是否存在 node = graph.run('MATCH(n:person{name:"' + value + '"}) return n').data() # 如果节点存在len(node)的值为1不存在的话len(node)的值为0 if len(node): # 如果该节点存在将该节点存入data数组中 # 构造字典存放节点信息 dict = { 'name': value, 'symbolSize': 50, 'category': '对象' } data.append(dict) # 查询与该节点有关的节点,无向,步长为1,并返回这些节点 nodes = graph.run('MATCH(n:person{name:"' + value + '"})<-->(m:person) return m').data() # 查询该节点所涉及的所有relationship,无向,步长为1,并返回这些relationship reps = graph.run('MATCH(n:person{name:"' + value + '"})<-[rel]->(m:person) return rel').data() # 处理节点信息 for n in nodes: # 将节点信息的格式转化为json node = json.dumps(n, ensure_ascii=False) node = json.loads(node) # 取出节点信息中person的name name = str(node['m']['name']) # 构造字典存放单个节点信息 dict = { 'name': name, 'symbolSize': 50, 'category': '对象' } # 将单个节点信息存储进data数组中 data.append(dict) # 处理relationship for r in reps: source = str(r['rel'].start_node['name']) target = str(r['rel'].end_node['name']) name = str(type(r['rel']).name) dict = { 'source': source, 'target': target, 'name': name } links.append(dict) # 构造字典存储data和links search_neo4j_data = { 'data': data, 'links': links } # 将dict转化为json格式 search_neo4j_data = json.dumps(search_neo4j_data) return search_neo4j_data else: # print("查无此节点") return 0

def index(request): ctx = {} if request.method == 'POST': # 接收前端传过来的查询值 node_name = request.POST.get('node') # 查询结果 search_neo4j_data = search_one(node_name) # 未查询到该节点 if search_neo4j_data == 0: ctx = {'title': '数据库中暂未添加该实体'} neo4j_data = search_all_category() return render(request, 'index.html', {'neo4j_data': neo4j_data, 'ctx': ctx}) # 查询到了该节点 else: neo4j_data = search_all_category() return render(request, 'index.html', {'neo4j_data': neo4j_data, 'search_neo4j_data': search_neo4j_data, 'ctx': ctx})

neo4j_data = search_all_category()
return render(request, 'index.html', {'neo4j_data': neo4j_data, 'ctx': ctx})

if name == 'main':

neo4j_data = search_all_category()

print(neo4j_data)

```

3.2 Echarts前端

index.html文件内容 ``` {% load static %}

Title

```

4 项目启动

进入到主目录文件夹下,运行manage.py启动项目,输入命令行: python manage.py runserver manage.py中的内容 ```python

!/usr/bin/env python

"""Django's command-line utility for administrative tasks.""" import os import sys

def main(): """Run administrative tasks.""" os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'neo4jconnect_test.settings') try: from django.core.management import execute_from_command_line except ImportError as exc: raise ImportError( "Couldn't import Django. Are you sure it's installed and " "available on your PYTHONPATH environment variable? Did you " "forget to activate a virtual environment?" ) from exc execute_from_command_line(sys.argv)

if name == 'main': main()

默认启动地址和端口为:`http://127.0.0.1:8000/` 若希望指定ip地址和端口,可按格式: python manage.py runserver --host 0.0.0.0 --port 9008 ``--host参数是ip地址,--port`参数是端口号

启动成功的命令输出

image.png

浏览器中输入地址和端口号,显示效果

image.png

遇到问题

echarts问题

1.加载echarts.min.js需要确定好路径
2.初始化要定义好dom,即,div中的id定义要和getElementById方法中初始化名称一致。否则会报错误 t is null
```

...

var dom = document.getElementById("chart-panel"); var myChart1 = echarts.init(dom); ``` 3.导入data和link时,要确保数据中没有重复字段,否则会造成节点和连线都不显示
4.保证link中的target和source都是字符串类型

数据问题

1.导入neo4j中的数据,要避免重复字段。错误示例:实体名称和类名称一致

参考

[1] https://github.com/liuhuanyong/QASystemOnMedicalKG
[2] https://github.com/wangle1218/KBQA-for-Diagnosis
[3] https://github.com/Sjyzheishuai/Neo4j-visualization
[4] https://blog.csdn.net/Fimooo/article/details/103069928
[5] https://blog.csdn.net/weixin_44747173/article/details/124835406
[6] https://blog.csdn.net/zjw120/article/details/124194577
[7] https://github.com/zhangxiang0316/echartsDemo
[8] https://github.com/pyecharts/pyecharts-gallery
[9] https://github.com/ecomfe/awesome-echarts