Wednesday, February 8, 2017

Neo4j Insertion/Query Performance Optimization on large (1G+) datasets

A 12 dimensional hypercube in Neo4j - why?


Purpose:

Design for performance by first determining the profile for use of batched insertion and paginated querying design patterns.

Raw performance data

Formula so far (in log form) is
data - threads + 1 = batch (example 2^18 - 2 ^ 3 + 1^1 = 2 ^ 16 = 65536 



5820 (RHEL7.3 ulimit) (sec, threads, time, 2 ^ power dataset, batch size)

508534,16,508,16,1
235979,16,235,16,2
291876,16,291,16,4
329377,16,329,16,8
354427,16,354,16,16
374336,16,374,16,32
384040,16,384,16,64
393977,16,393,16,128
387177,16,387,16,256
380892,16,380,16,512
383322,16,383,16,1024
363335,16,363,16,2048

3538939,8,3538,18,1
1775530,8,1775,18,2
3867348,8,3867,18,4
4978299,8,4978,18,8
5653734,8,5653,18,16
5999641,8,5999,18,32
6337678,8,6337,18,64
6468651,8,6468,18,128
6572826,8,6572,18,256
6762269,8,6762,18,512
6800382,8,6800,18,1024
7102821,8,7102,18,2048
7618263,8,7618,18,4096
8124558,8,8124,18,8192
6603800,8,6603,18,16384
9529125,8,9529,18,32768
1521072,8,1521,18,65536
4138558,8,4138,18,131072
13791585,8,13791,18,262144
58497594,8,58497,18,524288
1919401,16,1919,18,1
949259,16,949,18,2
3466038,16,3466,18,4
4789950,16,4789,18,8

i7-alu

45:*.xspf=38;5;45:, XDG_VTNR=1, SHLVL=2, HOME=/root}
3538939,8,3538,18,1
1775530,8,1775,18,2
3867348,8,3867,18,4
4978299,8,4978,18,8
5653734,8,5653,18,16
5999641,8,5999,18,32
6337678,8,6337,18,64
6468651,8,6468,18,128
6572826,8,6572,18,256
6762269,8,6762,18,512
6800382,8,6800,18,1024
7102821,8,7102,18,2048
7618263,8,7618,18,4096
8124558,8,8124,18,8192
6603800,8,6603,18,16384
9529125,8,9529,18,32768
1521072,8,1521,18,65536
4138558,8,4138,18,131072
13791585,8,13791,18,262144
58497594,8,58497,18,524288
1919401,16,1919,18,1
949259,16,949,18,2
3466038,16,3466,18,4
4789950,16,4789,18,8

5502976,16,5502,18,16
5887113,16,5887,18,32
6154832,16,6154,18,64
6330652,16,6330,18,128
6542268,16,6542,18,256
6651309,16,6651,18,512
6886699,16,6886,18,1024
6850014,16,6850,18,2048
6682343,16,6682,18,4096
6544843,16,6544,18,8192
5951906,16,5951,18,16384
752136,16,752,18,32768
1549619,16,1549,18,65536
4207831,16,4207,18,131072
15949986,16,15949,18,262144

59302815,16,59302,18,524288
1168163,32,1168,18,1
571298,32,571,18,2
3332109,32,3332,18,4
4764952,32,4764,18,8
5549885,32,5549,18,16
5970120,32,5970,18,32
6295001,32,6295,18,64
6496541,32,6496,18,128
6679320,32,6679,18,256
6824917,32,6824,18,512
6990629,32,6990,18,1024
6860142,32,6860,18,2048
6394366,32,6394,18,4096
5846679,32,5846,18,8192
3427860,32,3427,18,16384
756820,32,756,18,32768
1551643,32,1551,18,65536
4167513,32,4167,18,131072
15790413,32,15790,18,262144
59131487,32,59131,18,524288
^C
[1]+  Done                    nohup java -cp graph-0.0.4-SNAPSHOT.jar:neo4j-java-driver-1.1.1.jar org.obrienscience.graph.ForkJoinGraphServer 3 6 18 1 password 7687 127.0.0.1 1 19 1


Pagination - Server mode


    private void queryPage() {
    Driver driver = GraphDatabase.driver("bolt://" + ip + ":" + port,  AuthTokens.basic(username, pass)); 
    try (Session session = driver.session()) {
    try (Transaction aTransaction = session.beginTransaction()) {
    StatementResult result  = aTransaction.run(
    //new StringBuffer("MATCH (a:Node0 {name: {p0}})  return a").toString(),
    //new StringBuffer("MATCH (a) return a AS name ORDER BY name DESC LIMIT 10").toString(), // no
    new StringBuffer("MATCH (a) return a SKIP 10 LIMIT 10").toString(),
    parameters( "p0", 0));
    aTransaction.success();
    if(result.hasNext()) {
    System.out.println("Size: " + result.keys().size());
    while(result.hasNext()) {
    System.out.println(new StringBuffer(String.valueOf(System.currentTimeMillis())).append(",").append(result.next())
    .toString());
    }
    }
    }
    driver.close();
    System.out.println(System.currentTimeMillis() + ", end");
    }
    }

Pagination - Embedded mode



Notes

https://neo4j.com/docs/developer-manual/current/get-started/cypher/getting-the-results-you-want/

No comments:

Post a Comment