Source code for easygraph.readwrite.ucinet

Read and write graphs in UCINET DL format.
This implementation currently supports only the 'fullmatrix' data format.
The UCINET DL format is the most common file format used by UCINET package.
Basic example:
DL N = 5
0 1 1 1 1
1 0 1 0 0
1 1 0 0 1
1 0 0 0 0
1 0 1 0 0
    See UCINET User Guide or
    for full format information. Short version on

import re
import shlex

import easygraph as eg
import numpy as np

from easygraph.utils import open_file

__all__ = ["generate_ucinet", "read_ucinet", "parse_ucinet", "write_ucinet"]

[docs]def generate_ucinet(G): """Generate lines in UCINET graph format. Parameters ---------- G : graph A EasyGraph graph Examples -------- Notes ----- The default format 'fullmatrix' is used (for UCINET DL format). References ---------- See UCINET User Guide or for full format information. Short version on """ n = G.number_of_nodes() nodes = sorted(list(G.nodes)) yield "dl n=%i format=fullmatrix" % n # Labels try: int(nodes[0]) except ValueError: s = "labels:\n" for label in nodes: s += label + " " yield s yield "data:" yield str(np.asmatrix(eg.to_numpy_array(G, nodelist=nodes, dtype=int))).replace( "[", " " ).replace("]", " ").lstrip().rstrip()
[docs]@open_file(0, mode="rb") def read_ucinet(path, encoding="UTF-8"): """Read graph in UCINET format from path. Parameters ---------- path : file or string File or filename to read. Filenames ending in .gz or .bz2 will be uncompressed. Returns ------- G : EasyGraph MultiGraph or MultiDiGraph. Examples -------- >>> G=eg.path_graph(4) >>> eg.write_ucinet(G, "test.dl") >>> G=eg.read_ucinet("test.dl") To create a Graph instead of a MultiGraph use >>> G1=eg.Graph(G) See Also -------- parse_ucinet() References ---------- See UCINET User Guide or for full format information. Short version on """ lines = (line.decode(encoding) for line in path) return parse_ucinet(lines)
[docs]@open_file(1, mode="wb") def write_ucinet(G, path, encoding="UTF-8"): """Write graph in UCINET format to path. Parameters ---------- G : graph A EasyGraph graph path : file or string File or filename to write. Filenames ending in .gz or .bz2 will be compressed. Examples -------- >>> G=eg.path_graph(4) >>> eg.write_ucinet(G, "") References ---------- See UCINET User Guide or for full format information. Short version on """ for line in generate_ucinet(G): line += "\n" path.write(line.encode(encoding))
[docs]def parse_ucinet(lines): """Parse UCINET format graph from string or iterable. Currently only the 'fullmatrix', 'nodelist1' and 'nodelist1b' formats are supported. Parameters ---------- lines : string or iterable Data in UCINET format. Returns ------- G : EasyGraph graph See Also -------- read_ucinet() References ---------- See UCINET User Guide or for full format information. Short version on """ from numpy import genfromtxt from numpy import isnan from numpy import reshape G = eg.MultiDiGraph() if not isinstance(lines, str): s = "" for line in lines: if type(line) == bytes: s += line.decode("utf-8") else: s += line lines = s lexer = shlex.shlex(lines.lower()) lexer.whitespace += ",=" lexer.whitespace_split = True number_of_nodes = 0 number_of_matrices = 0 nr = 0 # number of rows (rectangular matrix) nc = 0 # number of columns (rectangular matrix) ucinet_format = "fullmatrix" # Format by default labels = {} # Contains labels of nodes row_labels_embedded = False # Whether labels are embedded in data or not cols_labels_embedded = False diagonal = True # whether the main diagonal is present or absent KEYWORDS = ("format", "data:", "labels:") # TODO remove ':' in keywords while lexer: try: token = next(lexer) except StopIteration: break # print "Token : %s" % token if token.startswith("n"): if token.startswith("nr"): nr = int(get_param(r"\d+", token, lexer)) number_of_nodes = max(nr, nc) elif token.startswith("nc"): nc = int(get_param(r"\d+", token, lexer)) number_of_nodes = max(nr, nc) elif token.startswith("nm"): number_of_matrices = int(get_param(r"\d+", token, lexer)) else: number_of_nodes = int(get_param(r"\d+", token, lexer)) nr = number_of_nodes nc = number_of_nodes elif token.startswith("diagonal"): diagonal = get_param("present|absent", token, lexer) elif token.startswith("format"): ucinet_format = get_param( """^(fullmatrix|upperhalf|lowerhalf|nodelist1|nodelist2|nodelist1b|\ edgelist1|edgelist2|blockmatrix|partition)$""", token, lexer, ) # TODO : row and columns labels elif token.startswith("row"): # Row labels pass elif token.startswith("column"): # Columns labels pass elif token.startswith("labels"): token = next(lexer) i = 0 while token not in KEYWORDS: if token.startswith("embedded"): row_labels_embedded = True cols_labels_embedded = True break else: labels[i] = token.replace( '"', "" ) # for labels with embedded spaces i += 1 try: token = next(lexer) except StopIteration: break elif token.startswith("data"): break data_lines = lines.lower().split("data:", 1)[1] # Generate edges params = {} if cols_labels_embedded: # params['names'] = True labels = dict(zip(range(0, nc), data_lines.splitlines()[1].split())) # params['skip_header'] = 2 # First character is \n if row_labels_embedded: # Skip first column # TODO rectangular case : labels can differ from rows to columns # params['usecols'] = range(1, nc + 1) pass if ucinet_format == "fullmatrix": # In Python3 genfromtxt requires bytes string try: data_lines = bytes(data_lines, "utf-8") except TypeError: pass # Do not use splitlines() because it is not necessarily written as a square matrix data = genfromtxt([data_lines], case_sensitive=False, **params) if cols_labels_embedded or row_labels_embedded: # data = insert(data, 0, float('nan')) data = data[~isnan(data)] mat = reshape(data, (max(number_of_nodes, nr), -1)) G = eg.from_numpy_array(mat, create_using=eg.MultiDiGraph()) elif ucinet_format in ( "nodelist1", "nodelist1b", ): # Since genfromtxt only accepts square matrix... s = "" for i, line in enumerate(data_lines.splitlines()): row = line.split() if row: if ucinet_format == "nodelist1b" and row[0] == "0": pass else: for neighbor in row[1:]: if ucinet_format == "nodelist1": source = row[0] else: source = str(i) s += source + " " + neighbor + "\n" G = eg.parse_edgelist( s.splitlines(), nodetype=str if row_labels_embedded and cols_labels_embedded else int, create_using=eg.MultiDiGraph(), ) if not row_labels_embedded or not cols_labels_embedded: G = eg.relabel_nodes(G, dict(zip(list(G.nodes), [i - 1 for i in G.nodes]))) elif ucinet_format == "edgelist1": G = eg.parse_edgelist( data_lines.splitlines(), nodetype=str if row_labels_embedded and cols_labels_embedded else int, create_using=eg.MultiDiGraph(), ) if not row_labels_embedded or not cols_labels_embedded: G = eg.relabel_nodes(G, dict(zip(list(G.nodes), [i - 1 for i in G.nodes]))) # Relabel nodes if labels: try: if len(list(G.nodes)) < number_of_nodes: G.add_nodes_from( labels.values() if labels else range(0, number_of_nodes) ) G = eg.relabel_nodes(G, labels) except KeyError: pass # Nodes already labelled return G
def get_param(regex, token, lines): """ Get a parameter value in UCINET DL file :param regex: string with the regex matching the parameter value :param token: token (string) in which we search for the parameter :param lines: to iterate through the next tokens :return: """ n = token query =, n) while query is None: try: n = next(lines) except StopIteration: raise Exception("Parameter %s value not recognized" % token) query =, n) return