Netflix Notebook
In [2]:
%%bash
which python
# Get Data
sh ../netflix/input/download_data.sh
ls
/home/kiran/.miniconda3/envs/dgraphpandas/bin/python Downloading DataSet Downloading netflix-shows.zip to /home/kiran/projects/dgraphpandas/samples/notebooks Archive: netflix-shows.zip inflating: netflix_titles.csv NetflixSample.ipynb PlanetSample.ipynb netflix_titles.csv
100%|██████████| 1.18M/1.18M [00:00<00:00, 4.68MB/s]
In [3]:
%%bash
# Start DGraph (remember to shut them down at end)
docker-compose -f ../../docker-compose.yml up -d
docker container ls
CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES 4afa67c17f8f dgraph/dgraph:latest "dgraph-ratel" 1 second ago Up Less than a second 8080/tcp, 0.0.0.0:8000->8000/tcp, 9080/tcp dgraphpandas_ratel_1 fa0b28998453 dgraph/dgraph:latest "dgraph alpha --my=a…" 1 second ago Up Less than a second 0.0.0.0:8080->8080/tcp, 0.0.0.0:9080->9080/tcp dgraphpandas_alpha_1 b0c1a1610a66 dgraph/dgraph:latest "dgraph zero --my=ze…" 1 second ago Up Less than a second 0.0.0.0:5080->5080/tcp, 8080/tcp, 0.0.0.0:6080->6080/tcp, 9080/tcp dgraphpandas_zero_1
Creating network "dgraphpandas_default" with the default driver Creating dgraphpandas_zero_1 ... Creating dgraphpandas_alpha_1 ... Creating dgraphpandas_ratel_1 ... Creating dgraphpandas_ratel_1 ... done Creating dgraphpandas_alpha_1 ... done Creating dgraphpandas_zero_1 ... done
In [4]:
%%bash
python -m pip install dgraphpandas
python ../netflix/input/download_cleanup.py
ls
Requirement already satisfied: dgraphpandas in /home/kiran/.miniconda3/envs/dgraphpandas/lib/python3.6/site-packages (0.0.4) Requirement already satisfied: pandas in /home/kiran/.miniconda3/envs/dgraphpandas/lib/python3.6/site-packages (from dgraphpandas) (1.1.5) Requirement already satisfied: numpy>=1.15.4 in /home/kiran/.miniconda3/envs/dgraphpandas/lib/python3.6/site-packages (from pandas->dgraphpandas) (1.19.5) Requirement already satisfied: python-dateutil>=2.7.3 in /home/kiran/.miniconda3/envs/dgraphpandas/lib/python3.6/site-packages (from pandas->dgraphpandas) (2.8.1) Requirement already satisfied: pytz>=2017.2 in /home/kiran/.miniconda3/envs/dgraphpandas/lib/python3.6/site-packages (from pandas->dgraphpandas) (2021.1) Requirement already satisfied: six>=1.5 in /home/kiran/.miniconda3/envs/dgraphpandas/lib/python3.6/site-packages (from python-dateutil>=2.7.3->pandas->dgraphpandas) (1.15.0) Generating title types Generating directors Generating cast Generating rating Generating genre NetflixSample.ipynb PlanetSample.ipynb cast.csv directors.csv genre.csv netflix_titles.csv rating.csv show_types.csv
In [5]:
%%bash
# Generate Upserts
python -m dgraphpandas -c ../netflix/dgraphpandas.json -ck director -f directors.csv --console
python -m dgraphpandas -c ../netflix/dgraphpandas.json -ck title -f netflix_titles.csv --console
Intrinsic: subject predicate object type 0 director_JorgeMichelGrau identifier Jorge Michel Grau <xs:string> 1 director_GilbertChan identifier Gilbert Chan <xs:string> 2 director_ShaneAcker identifier Shane Acker <xs:string> 3 director_RobertLuketic identifier Robert Luketic <xs:string> 4 director_SerdarAkar identifier Serdar Akar <xs:string> ... ... ... ... ... 6105 director_RodrigoGuardiola dgraph.type director <xs:string> 6106 director_GabrielCruzRivas dgraph.type director <xs:string> 6111 director_PeterHewitt dgraph.type director <xs:string> 6112 director_JosefFares dgraph.type director <xs:string> 6113 director_MozezSingh dgraph.type director <xs:string> [10593 rows x 4 columns] Edges: Empty DataFrame Columns: [subject, predicate, object, type] Index: [] Intrinsic: subject predicate object type 7787 title_s1 title 3% <xs:string> 7788 title_s2 title 7:19 <xs:string> 7789 title_s3 title 23:59 <xs:string> 7790 title_s4 title 9 <xs:string> 7791 title_s5 title 21 <xs:string> ... ... ... ... ... 46717 title_s7783 date_added 2020-10-19T00:00:00 <xs:dateTime> 46718 title_s7784 date_added 2019-03-02T00:00:00 <xs:dateTime> 46719 title_s7785 date_added 2020-09-25T00:00:00 <xs:dateTime> 46720 title_s7786 date_added 2020-10-31T00:00:00 <xs:dateTime> 46721 title_s7787 date_added 2020-03-01T00:00:00 <xs:dateTime> [46712 rows x 4 columns] Edges: subject predicate object type 0 title_s1 type type_TVShow None 1 title_s2 type type_Movie None 2 title_s3 type type_Movie None 3 title_s4 type type_Movie None 4 title_s5 type type_Movie None ... ... ... ... ... 77867 title_s7785 genre genre_Music&Musicals None 77868 title_s7786 genre genre_InternationalTVShows None 77868 title_s7786 genre genre_RealityTV None 77869 title_s7787 genre genre_Documentaries None 77869 title_s7787 genre genre_Music&Musicals None [103775 rows x 4 columns]
2021-04-04 16:39:16 DESKTOP-4HNUNO0 __main__[4076] DEBUG Global Config {'files': {'cast': {'pre_rename': {'cast': 'identifier'}, 'subject_fields': ['id']}, 'director': {'pre_rename': {'director': 'identifier'}, 'subject_fields': ['id']}, 'genre': {'pre_rename': {'genre': 'identifier'}, 'subject_fields': ['id']}, 'rating': {'pre_rename': {'rating': 'identifier'}, 'subject_fields': ['id']}, 'show_types': {'pre_rename': {'type': 'identifier'}, 'subject_fields': ['id']}, 'title': {'csv_edges': ['director', 'cast', 'genre', 'country'], 'edge_fields': ['type', 'director', 'cast', 'country', 'rating', 'genre'], 'pre_rename': {'listed_in': 'genre'}, 'subject_fields': ['show_id'], 'type_overrides': {'cast': 'object', 'country': 'object', 'date_added': 'datetime64', 'description': 'object', 'director': 'object', 'duration': 'object', 'listed_in': 'object', 'rating': 'object', 'release_year': 'int32', 'title': 'object', 'type': 'object'}}}, 'transform': 'horizontal'} 2021-04-04 16:39:16 DESKTOP-4HNUNO0 dgraphpandas.strategies.horizontal[4076] DEBUG Reading file directors.csv 2021-04-04 16:39:16 DESKTOP-4HNUNO0 dgraphpandas.strategies.horizontal[4076] DEBUG Applying Type Overrides {} 2021-04-04 16:39:16 DESKTOP-4HNUNO0 dgraphpandas.strategies.horizontal[4076] DEBUG Melting frame with subject: ['id'] 2021-04-04 16:39:16 DESKTOP-4HNUNO0 dgraphpandas.strategies.vertical_helpers[4076] DEBUG Joining Key fields ['id'] to subject 2021-04-04 16:39:16 DESKTOP-4HNUNO0 dgraphpandas.strategies.vertical_helpers[4076] DEBUG Dropping keys in favour of subject 2021-04-04 16:39:16 DESKTOP-4HNUNO0 dgraphpandas.strategies.vertical_helpers[4076] DEBUG Adding dgraph.type fields 2021-04-04 16:39:16 DESKTOP-4HNUNO0 dgraphpandas.strategies.vertical_helpers[4076] DEBUG No Edges defined, Skipping. 2021-04-04 16:39:16 DESKTOP-4HNUNO0 dgraphpandas.strategies.vertical_helpers[4076] DEBUG Applying RDF Types 2021-04-04 16:39:16 DESKTOP-4HNUNO0 dgraphpandas.strategies.vertical_helpers[4076] DEBUG Ensuring Date Time fields are in ISO format 2021-04-04 16:39:16 DESKTOP-4HNUNO0 dgraphpandas.strategies.vertical_helpers[4076] DEBUG Compiling Illegal Characters ['%', '\\.', '\\s', '"', '\\n', '\\r\\n'] 2021-04-04 16:39:16 DESKTOP-4HNUNO0 dgraphpandas.strategies.vertical_helpers[4076] DEBUG Resolving illegal_characters 2021-04-04 16:39:16 DESKTOP-4HNUNO0 dgraphpandas.strategies.vertical_helpers[4076] DEBUG Compiling Illegal Characters ['"', '\\n', '\\r\\n'] 2021-04-04 16:39:16 DESKTOP-4HNUNO0 dgraphpandas.strategies.vertical_helpers[4076] DEBUG Resolving illegal_characters 2021-04-04 16:39:16 DESKTOP-4HNUNO0 dgraphpandas.strategies.vertical_helpers[4076] DEBUG Compiling Illegal Characters ['%', '\\.', '\\s', '"', '\\n', '\\r\\n'] 2021-04-04 16:39:16 DESKTOP-4HNUNO0 dgraphpandas.strategies.vertical_helpers[4076] DEBUG Resolving illegal_characters 2021-04-04 16:39:16 DESKTOP-4HNUNO0 dgraphpandas.strategies.vertical_helpers[4076] DEBUG Compiling Illegal Characters ['%', '\\.', '\\s', '"', '\\n', '\\r\\n'] 2021-04-04 16:39:16 DESKTOP-4HNUNO0 dgraphpandas.strategies.vertical_helpers[4076] DEBUG Resolving illegal_characters 2021-04-04 16:39:16 DESKTOP-4HNUNO0 dgraphpandas.strategies.vertical_helpers[4076] DEBUG Dropping records where NA on object 2021-04-04 16:39:16 DESKTOP-4HNUNO0 dgraphpandas.strategies.vertical_helpers[4076] DEBUG Dropping records where NA on object 2021-04-04 16:39:16 DESKTOP-4HNUNO0 __main__[4076] DEBUG Concatting 1 intrinsic frames and 1 frames 2021-04-04 16:39:16 DESKTOP-4HNUNO0 __main__[4076] INFO Generating Upsert Queries 2021-04-04 16:39:16 DESKTOP-4HNUNO0 dgraphpandas.writers.upserts[4076] DEBUG Dropping NA Objects from intrinsic 2021-04-04 16:39:16 DESKTOP-4HNUNO0 __main__[4076] INFO Writing to 10593 upserts to ./directors_intrinsic.gz 2021-04-04 16:39:16 DESKTOP-4HNUNO0 __main__[4076] INFO Writing to 0 upserts to ./directors_edges.gz 2021-04-04 16:39:16 DESKTOP-4HNUNO0 __main__[4080] DEBUG Global Config {'files': {'cast': {'pre_rename': {'cast': 'identifier'}, 'subject_fields': ['id']}, 'director': {'pre_rename': {'director': 'identifier'}, 'subject_fields': ['id']}, 'genre': {'pre_rename': {'genre': 'identifier'}, 'subject_fields': ['id']}, 'rating': {'pre_rename': {'rating': 'identifier'}, 'subject_fields': ['id']}, 'show_types': {'pre_rename': {'type': 'identifier'}, 'subject_fields': ['id']}, 'title': {'csv_edges': ['director', 'cast', 'genre', 'country'], 'edge_fields': ['type', 'director', 'cast', 'country', 'rating', 'genre'], 'pre_rename': {'listed_in': 'genre'}, 'subject_fields': ['show_id'], 'type_overrides': {'cast': 'object', 'country': 'object', 'date_added': 'datetime64', 'description': 'object', 'director': 'object', 'duration': 'object', 'listed_in': 'object', 'rating': 'object', 'release_year': 'int32', 'title': 'object', 'type': 'object'}}}, 'transform': 'horizontal'} 2021-04-04 16:39:16 DESKTOP-4HNUNO0 dgraphpandas.strategies.horizontal[4080] DEBUG Reading file netflix_titles.csv 2021-04-04 16:39:16 DESKTOP-4HNUNO0 dgraphpandas.strategies.horizontal[4080] DEBUG Applying Type Overrides {'type': 'object', 'title': 'object', 'director': 'object', 'cast': 'object', 'country': 'object', 'date_added': 'datetime64', 'release_year': 'int32', 'rating': 'object', 'duration': 'object', 'listed_in': 'object', 'description': 'object'} 2021-04-04 16:39:16 DESKTOP-4HNUNO0 dgraphpandas.strategies.horizontal[4080] DEBUG Converting type to object 2021-04-04 16:39:16 DESKTOP-4HNUNO0 dgraphpandas.strategies.horizontal[4080] DEBUG Converting title to object 2021-04-04 16:39:16 DESKTOP-4HNUNO0 dgraphpandas.strategies.horizontal[4080] DEBUG Converting director to object 2021-04-04 16:39:16 DESKTOP-4HNUNO0 dgraphpandas.strategies.horizontal[4080] DEBUG Converting cast to object 2021-04-04 16:39:16 DESKTOP-4HNUNO0 dgraphpandas.strategies.horizontal[4080] DEBUG Converting country to object 2021-04-04 16:39:16 DESKTOP-4HNUNO0 dgraphpandas.strategies.horizontal[4080] DEBUG Converting date_added to datetime64 2021-04-04 16:39:16 DESKTOP-4HNUNO0 dgraphpandas.strategies.horizontal[4080] DEBUG Converting release_year to int32 2021-04-04 16:39:16 DESKTOP-4HNUNO0 dgraphpandas.strategies.horizontal[4080] DEBUG Converting rating to object 2021-04-04 16:39:16 DESKTOP-4HNUNO0 dgraphpandas.strategies.horizontal[4080] DEBUG Converting duration to object 2021-04-04 16:39:16 DESKTOP-4HNUNO0 dgraphpandas.strategies.horizontal[4080] DEBUG Converting listed_in to object 2021-04-04 16:39:16 DESKTOP-4HNUNO0 dgraphpandas.strategies.horizontal[4080] DEBUG Converting description to object 2021-04-04 16:39:16 DESKTOP-4HNUNO0 dgraphpandas.strategies.horizontal[4080] DEBUG Melting frame with subject: ['show_id'] 2021-04-04 16:39:16 DESKTOP-4HNUNO0 dgraphpandas.strategies.vertical_helpers[4080] INFO Detected csv_edges ['director', 'cast', 'genre', 'country']. Breaking up those columns 2021-04-04 16:39:16 DESKTOP-4HNUNO0 dgraphpandas.strategies.vertical_helpers[4080] DEBUG Joining Key fields ['show_id'] to subject 2021-04-04 16:39:17 DESKTOP-4HNUNO0 dgraphpandas.strategies.vertical_helpers[4080] DEBUG Dropping keys in favour of subject 2021-04-04 16:39:17 DESKTOP-4HNUNO0 dgraphpandas.strategies.vertical_helpers[4080] DEBUG Adding dgraph.type fields 2021-04-04 16:39:17 DESKTOP-4HNUNO0 dgraphpandas.strategies.vertical_helpers[4080] DEBUG Splitting into Intrinsic and edges based on edges ['type', 'director', 'cast', 'country', 'rating', 'genre'] 2021-04-04 16:39:17 DESKTOP-4HNUNO0 dgraphpandas.strategies.vertical_helpers[4080] DEBUG Applying RDF Types 2021-04-04 16:39:17 DESKTOP-4HNUNO0 dgraphpandas.strategies.vertical_helpers[4080] DEBUG Ensuring Date Time fields are in ISO format 2021-04-04 16:39:17 DESKTOP-4HNUNO0 dgraphpandas.strategies.vertical_helpers[4080] DEBUG Compiling Illegal Characters ['%', '\\.', '\\s', '"', '\\n', '\\r\\n'] 2021-04-04 16:39:17 DESKTOP-4HNUNO0 dgraphpandas.strategies.vertical_helpers[4080] DEBUG Resolving illegal_characters 2021-04-04 16:39:17 DESKTOP-4HNUNO0 dgraphpandas.strategies.vertical_helpers[4080] DEBUG Compiling Illegal Characters ['"', '\\n', '\\r\\n'] 2021-04-04 16:39:17 DESKTOP-4HNUNO0 dgraphpandas.strategies.vertical_helpers[4080] DEBUG Resolving illegal_characters 2021-04-04 16:39:17 DESKTOP-4HNUNO0 dgraphpandas.strategies.vertical_helpers[4080] DEBUG Compiling Illegal Characters ['%', '\\.', '\\s', '"', '\\n', '\\r\\n'] 2021-04-04 16:39:17 DESKTOP-4HNUNO0 dgraphpandas.strategies.vertical_helpers[4080] DEBUG Resolving illegal_characters 2021-04-04 16:39:17 DESKTOP-4HNUNO0 dgraphpandas.strategies.vertical_helpers[4080] DEBUG Compiling Illegal Characters ['%', '\\.', '\\s', '"', '\\n', '\\r\\n'] 2021-04-04 16:39:17 DESKTOP-4HNUNO0 dgraphpandas.strategies.vertical_helpers[4080] DEBUG Resolving illegal_characters 2021-04-04 16:39:17 DESKTOP-4HNUNO0 dgraphpandas.strategies.vertical_helpers[4080] DEBUG Dropping records where NA on object 2021-04-04 16:39:17 DESKTOP-4HNUNO0 dgraphpandas.strategies.vertical_helpers[4080] DEBUG Dropping records where NA on object 2021-04-04 16:39:17 DESKTOP-4HNUNO0 __main__[4080] DEBUG Concatting 1 intrinsic frames and 1 frames 2021-04-04 16:39:17 DESKTOP-4HNUNO0 __main__[4080] INFO Generating Upsert Queries 2021-04-04 16:39:17 DESKTOP-4HNUNO0 dgraphpandas.writers.upserts[4080] DEBUG Dropping NA Objects from intrinsic 2021-04-04 16:39:17 DESKTOP-4HNUNO0 __main__[4080] INFO Writing to 46712 upserts to ./netflix_titles_intrinsic.gz 2021-04-04 16:39:17 DESKTOP-4HNUNO0 __main__[4080] INFO Writing to 103775 upserts to ./netflix_titles_edges.gz
In [7]:
!ls *.gz
directors_edges.gz netflix_titles_edges.gz directors_intrinsic.gz netflix_titles_intrinsic.gz
In [14]:
# Ensure XID predicate exists for upsert operations
# If you get auth issue, then take a look at the whitelist ip in the docker-compose file
!curl -sX POST localhost:8080/alter -d 'xid: string @index(exact) .' | jq .
{ "data": { "code": "Success", "message": "Done" } }
In [15]:
%%bash
#Apply Upserts to Dgraph
# Omitting the rest of the files for breveity, but it's the same following steps for all of them
dgraph live --files directors_intrinsic.gz --upsertPredicate xid --xidmap xidmap --format rdf --batch 500
dgraph live --files directors_edges.gz --upsertPredicate xid --xidmap xidmap --format rdf --batch 500
dgraph live --files netflix_titles_intrinsic.gz --upsertPredicate xid --xidmap xidmap --format rdf --batch 500
dgraph live --files netflix_titles_edges.gz --upsertPredicate xid --xidmap xidmap --format rdf --batch 500
[Decoder]: Using assembly version of decoder Page Size: 4096 Running transaction with dgraph endpoint: 127.0.0.1:9080 Found 1 data file(s) to process Processing data file "directors_intrinsic.gz" Number of TXs run : 22 Number of N-Quads processed : 10593 Time spent : 514.0192ms N-Quads processed per second : 10593 [Decoder]: Using assembly version of decoder Page Size: 4096 Running transaction with dgraph endpoint: 127.0.0.1:9080 Found 1 data file(s) to process Processing data file "directors_edges.gz" Number of TXs run : 0 Number of N-Quads processed : 0 Time spent : 2.7456ms N-Quads processed per second : 0 [Decoder]: Using assembly version of decoder Page Size: 4096 Running transaction with dgraph endpoint: 127.0.0.1:9080 Found 1 data file(s) to process Processing data file "netflix_titles_intrinsic.gz" Number of TXs run : 94 Number of N-Quads processed : 46712 Time spent : 2.0965622s N-Quads processed per second : 23356 [Decoder]: Using assembly version of decoder Page Size: 4096 Running transaction with dgraph endpoint: 127.0.0.1:9080 Found 1 data file(s) to process Processing data file "netflix_titles_edges.gz" Number of TXs run : 208 Number of N-Quads processed : 103775 Time spent : 4.7730272s N-Quads processed per second : 25943
I0404 16:43:45.990922 4539 init.go:107] Dgraph version : v20.11.2 Dgraph codename : tchalla-2 Dgraph SHA-256 : 0153cb8d3941ad5ad107e395b347e8d930a0b4ead6f4524521f7a525a9699167 Commit SHA-1 : 94f3a0430 Commit timestamp : 2021-02-23 13:07:17 +0530 Branch : HEAD Go version : go1.15.5 jemalloc enabled : true For Dgraph official documentation, visit https://dgraph.io/docs/. For discussions about Dgraph , visit https://discuss.dgraph.io. Licensed variously under the Apache Public License 2.0 and Dgraph Community License. Copyright 2015-2020 Dgraph Labs, Inc. badger 2021/04/04 16:43:46 INFO: All 0 tables opened in 0s badger 2021/04/04 16:43:46 INFO: Discard stats nextEmptySlot: 0 badger 2021/04/04 16:43:46 INFO: Set nextTxnTs to 0 I0404 16:43:46.021243 4539 xidmap.go:128] Loaded up 0 xid to uid mappings I0404 16:43:46.535335 4539 xidmap.go:288] Writing xid map to DB I0404 16:43:46.535366 4539 xidmap.go:290] Finished writing xid map to DB badger 2021/04/04 16:43:46 INFO: Lifetime L0 stalled for: 0s badger 2021/04/04 16:43:46 INFO: Level 0 [ ]: NumTables: 00. Size: 0 B of 0 B. Score: 0.00->0.00 Target FileSize: 64 MiB Level 1 [ ]: NumTables: 00. Size: 0 B of 10 MiB. Score: 0.00->0.00 Target FileSize: 2.0 MiB Level 2 [ ]: NumTables: 00. Size: 0 B of 10 MiB. Score: 0.00->0.00 Target FileSize: 2.0 MiB Level 3 [ ]: NumTables: 00. Size: 0 B of 10 MiB. Score: 0.00->0.00 Target FileSize: 2.0 MiB Level 4 [ ]: NumTables: 00. Size: 0 B of 10 MiB. Score: 0.00->0.00 Target FileSize: 2.0 MiB Level 5 [ ]: NumTables: 00. Size: 0 B of 10 MiB. Score: 0.00->0.00 Target FileSize: 2.0 MiB Level 6 [B]: NumTables: 00. Size: 0 B of 10 MiB. Score: 0.00->0.00 Target FileSize: 2.0 MiB Level Done I0404 16:43:46.780294 4550 init.go:107] Dgraph version : v20.11.2 Dgraph codename : tchalla-2 Dgraph SHA-256 : 0153cb8d3941ad5ad107e395b347e8d930a0b4ead6f4524521f7a525a9699167 Commit SHA-1 : 94f3a0430 Commit timestamp : 2021-02-23 13:07:17 +0530 Branch : HEAD Go version : go1.15.5 jemalloc enabled : true For Dgraph official documentation, visit https://dgraph.io/docs/. For discussions about Dgraph , visit https://discuss.dgraph.io. Licensed variously under the Apache Public License 2.0 and Dgraph Community License. Copyright 2015-2020 Dgraph Labs, Inc. badger 2021/04/04 16:43:46 INFO: All 0 tables opened in 0s badger 2021/04/04 16:43:46 INFO: Discard stats nextEmptySlot: 0 badger 2021/04/04 16:43:46 INFO: Set nextTxnTs to 0 I0404 16:43:46.809251 4550 xidmap.go:128] Loaded up 0 xid to uid mappings I0404 16:43:46.812056 4550 xidmap.go:288] Writing xid map to DB I0404 16:43:46.812104 4550 xidmap.go:290] Finished writing xid map to DB badger 2021/04/04 16:43:46 INFO: Lifetime L0 stalled for: 0s badger 2021/04/04 16:43:46 INFO: Level 0 [ ]: NumTables: 00. Size: 0 B of 0 B. Score: 0.00->0.00 Target FileSize: 64 MiB Level 1 [ ]: NumTables: 00. Size: 0 B of 10 MiB. Score: 0.00->0.00 Target FileSize: 2.0 MiB Level 2 [ ]: NumTables: 00. Size: 0 B of 10 MiB. Score: 0.00->0.00 Target FileSize: 2.0 MiB Level 3 [ ]: NumTables: 00. Size: 0 B of 10 MiB. Score: 0.00->0.00 Target FileSize: 2.0 MiB Level 4 [ ]: NumTables: 00. Size: 0 B of 10 MiB. Score: 0.00->0.00 Target FileSize: 2.0 MiB Level 5 [ ]: NumTables: 00. Size: 0 B of 10 MiB. Score: 0.00->0.00 Target FileSize: 2.0 MiB Level 6 [B]: NumTables: 00. Size: 0 B of 10 MiB. Score: 0.00->0.00 Target FileSize: 2.0 MiB Level Done I0404 16:43:47.076154 4562 init.go:107] Dgraph version : v20.11.2 Dgraph codename : tchalla-2 Dgraph SHA-256 : 0153cb8d3941ad5ad107e395b347e8d930a0b4ead6f4524521f7a525a9699167 Commit SHA-1 : 94f3a0430 Commit timestamp : 2021-02-23 13:07:17 +0530 Branch : HEAD Go version : go1.15.5 jemalloc enabled : true For Dgraph official documentation, visit https://dgraph.io/docs/. For discussions about Dgraph , visit https://discuss.dgraph.io. Licensed variously under the Apache Public License 2.0 and Dgraph Community License. Copyright 2015-2020 Dgraph Labs, Inc. badger 2021/04/04 16:43:47 INFO: All 0 tables opened in 0s badger 2021/04/04 16:43:47 INFO: Discard stats nextEmptySlot: 0 badger 2021/04/04 16:43:47 INFO: Set nextTxnTs to 0 badger 2021/04/04 16:43:47 INFO: Deleting empty file: xidmap/000001.vlog I0404 16:43:47.094656 4562 xidmap.go:128] Loaded up 0 xid to uid mappings I0404 16:43:49.191277 4562 xidmap.go:288] Writing xid map to DB I0404 16:43:49.191312 4562 xidmap.go:290] Finished writing xid map to DB badger 2021/04/04 16:43:49 INFO: Lifetime L0 stalled for: 0s badger 2021/04/04 16:43:49 INFO: Level 0 [ ]: NumTables: 00. Size: 0 B of 0 B. Score: 0.00->0.00 Target FileSize: 64 MiB Level 1 [ ]: NumTables: 00. Size: 0 B of 10 MiB. Score: 0.00->0.00 Target FileSize: 2.0 MiB Level 2 [ ]: NumTables: 00. Size: 0 B of 10 MiB. Score: 0.00->0.00 Target FileSize: 2.0 MiB Level 3 [ ]: NumTables: 00. Size: 0 B of 10 MiB. Score: 0.00->0.00 Target FileSize: 2.0 MiB Level 4 [ ]: NumTables: 00. Size: 0 B of 10 MiB. Score: 0.00->0.00 Target FileSize: 2.0 MiB Level 5 [ ]: NumTables: 00. Size: 0 B of 10 MiB. Score: 0.00->0.00 Target FileSize: 2.0 MiB Level 6 [B]: NumTables: 00. Size: 0 B of 10 MiB. Score: 0.00->0.00 Target FileSize: 2.0 MiB Level Done I0404 16:43:49.429345 4573 init.go:107] Dgraph version : v20.11.2 Dgraph codename : tchalla-2 Dgraph SHA-256 : 0153cb8d3941ad5ad107e395b347e8d930a0b4ead6f4524521f7a525a9699167 Commit SHA-1 : 94f3a0430 Commit timestamp : 2021-02-23 13:07:17 +0530 Branch : HEAD Go version : go1.15.5 jemalloc enabled : true For Dgraph official documentation, visit https://dgraph.io/docs/. For discussions about Dgraph , visit https://discuss.dgraph.io. Licensed variously under the Apache Public License 2.0 and Dgraph Community License. Copyright 2015-2020 Dgraph Labs, Inc. badger 2021/04/04 16:43:49 INFO: All 0 tables opened in 0s badger 2021/04/04 16:43:49 INFO: Discard stats nextEmptySlot: 0 badger 2021/04/04 16:43:49 INFO: Set nextTxnTs to 0 badger 2021/04/04 16:43:49 INFO: Deleting empty file: xidmap/000002.vlog I0404 16:43:49.450931 4573 xidmap.go:128] Loaded up 0 xid to uid mappings I0404 16:43:54.224058 4573 xidmap.go:288] Writing xid map to DB I0404 16:43:54.224086 4573 xidmap.go:290] Finished writing xid map to DB badger 2021/04/04 16:43:54 INFO: Lifetime L0 stalled for: 0s badger 2021/04/04 16:43:54 INFO: Level 0 [ ]: NumTables: 00. Size: 0 B of 0 B. Score: 0.00->0.00 Target FileSize: 64 MiB Level 1 [ ]: NumTables: 00. Size: 0 B of 10 MiB. Score: 0.00->0.00 Target FileSize: 2.0 MiB Level 2 [ ]: NumTables: 00. Size: 0 B of 10 MiB. Score: 0.00->0.00 Target FileSize: 2.0 MiB Level 3 [ ]: NumTables: 00. Size: 0 B of 10 MiB. Score: 0.00->0.00 Target FileSize: 2.0 MiB Level 4 [ ]: NumTables: 00. Size: 0 B of 10 MiB. Score: 0.00->0.00 Target FileSize: 2.0 MiB Level 5 [ ]: NumTables: 00. Size: 0 B of 10 MiB. Score: 0.00->0.00 Target FileSize: 2.0 MiB Level 6 [B]: NumTables: 00. Size: 0 B of 10 MiB. Score: 0.00->0.00 Target FileSize: 2.0 MiB Level Done
In [16]:
%%bash
echo Applying Reverse Edges
curl -sX POST localhost:8080/alter -d 'cast: [uid] @reverse .' | jq .
curl -sX POST localhost:8080/alter -d 'genre: [uid] @reverse .' | jq .
curl -sX POST localhost:8080/alter -d 'director: [uid] @reverse .' | jq .
curl -sX POST localhost:8080/alter -d 'rating: [uid] @reverse .' | jq .
curl -sX POST localhost:8080/alter -d 'description: string @index(term) .' | jq .
Applying Reverse Edges { "data": { "code": "Success", "message": "Done" } } { "data": { "code": "Success", "message": "Done" } } { "data": { "code": "Success", "message": "Done" } } { "data": { "code": "Success", "message": "Done" } } { "data": { "code": "Success", "message": "Done" } }
In [17]:
%%bash
# Example Query
curl -sX POST localhost:8080/query -H 'Content-Type: application/dql' -d '
{
directors(func: type(director), first: 10)
{
identifier
titles: ~director(first: 5) @filter(type(title))
{
title
date_added
release_year
}
}
}
' | jq .
{ "data": { "directors": [ { "identifier": "Alexis Morante", "titles": [ { "title": "Alejandro Sanz: What I Was Is What I Am", "date_added": "2018-08-18T00:00:00Z", "release_year": 2018 }, { "title": "Camarón: The Film", "date_added": "2018-07-18T00:00:00Z", "release_year": 2018 } ] }, { "identifier": "Ishaan Trivedi", "titles": [ { "title": "A Scandall", "date_added": "2017-10-15T00:00:00Z", "release_year": 2016 } ] }, { "identifier": "Brendon Marotta", "titles": [ { "title": "American Circumcision", "date_added": "2018-12-16T00:00:00Z", "release_year": 2017 } ] }, { "identifier": "Alfonso Serrano Maturino", "titles": [ { "title": "Acapulco La vida va", "date_added": "2017-06-03T00:00:00Z", "release_year": 2016 } ] }, { "identifier": "Pedro Coutinho", "titles": [ { "title": "All The Reasons To Forget", "date_added": "2018-04-19T00:00:00Z", "release_year": 2017 } ] }, { "identifier": "Scott Christian Sava", "titles": [ { "title": "Animal Crackers", "date_added": "2020-07-24T00:00:00Z", "release_year": 2020 } ] }, { "identifier": "Brian Smrz", "titles": [ { "title": "24 Hours to Live", "date_added": "2018-04-07T00:00:00Z", "release_year": 2017 } ] }, { "identifier": "Chris Blake", "titles": [ { "title": "All Light Will End", "date_added": "2019-02-02T00:00:00Z", "release_year": 2018 } ] }, { "identifier": "Junichi Sato", "titles": [ { "title": "A Whisker Away", "date_added": "2020-06-18T00:00:00Z", "release_year": 2020 } ] }, { "identifier": "Rusty Nixon", "titles": [ { "title": "A.M.I.", "date_added": "2020-10-01T00:00:00Z", "release_year": 2019 } ] } ] }, "extensions": { "server_latency": { "parsing_ns": 59600, "processing_ns": 4522800, "encoding_ns": 102900, "assign_timestamp_ns": 779700, "total_ns": 5532400 }, "txn": { "start_ts": 181053 }, "metrics": { "num_uids": { "_total": 64, "date_added": 11, "dgraph.type": 11, "identifier": 10, "release_year": 11, "title": 11, "~director": 10 } } } }
In [18]:
%%bash
# Stop DGraph
docker-compose -f ../../docker-compose.yml down
Stopping dgraphpandas_ratel_1 ... Stopping dgraphpandas_alpha_1 ... Stopping dgraphpandas_zero_1 ... Stopping dgraphpandas_ratel_1 ... done Stopping dgraphpandas_alpha_1 ... done Stopping dgraphpandas_zero_1 ... done Removing dgraphpandas_ratel_1 ... Removing dgraphpandas_alpha_1 ... Removing dgraphpandas_zero_1 ... Removing dgraphpandas_ratel_1 ... done Removing dgraphpandas_alpha_1 ... done Removing dgraphpandas_zero_1 ... done Removing network dgraphpandas_default
In [19]:
%%bash
# Clean up
rm *.csv
rm *.gz
rm -rf t
rm -rf xidmap