diff --git a/.env b/.env new file mode 100644 index 0000000..11039c0 --- /dev/null +++ b/.env @@ -0,0 +1,2 @@ +MONGO_INITDB_ROOT_USERNAME=root +MONGO_INITDB_ROOT_PASSWORD=vinay \ No newline at end of file diff --git a/docker-compose.yaml b/docker-compose.yaml index d315482..e849c9e 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -1,9 +1,16 @@ -version: '3.9' +version: '3.8' services: mongodb: image: mongo:latest + container_name: mongodb + restart: always + environment: + env_file: ./env ports: - - 27017:27017 + - "27017:27017" volumes: - - ~/mongodb/mongo:/data/db \ No newline at end of file + - mongodb_data:/data/db + +volumes: + mongodb_data: diff --git a/playground-3.mongodb.js b/playground-3.mongodb.js new file mode 100644 index 0000000..ba8d8e9 --- /dev/null +++ b/playground-3.mongodb.js @@ -0,0 +1,8 @@ + + + + +use("vinay"); +db.accounts.find( +{} +); \ No newline at end of file diff --git a/sample_analytics/accounts.json b/sample_analytics/accounts.json index baf9e2c..812be4f 100644 --- a/sample_analytics/accounts.json +++ b/sample_analytics/accounts.json @@ -1744,3 +1744,4 @@ {"_id":{"$oid":"5ca4bbc7a2dd94ee58162a5e"},"account_id":{"$numberInt":"684319"},"limit":{"$numberInt":"10000"},"products":["InvestmentFund","InvestmentStock"]} {"_id":{"$oid":"5ca4bbc7a2dd94ee58162a5f"},"account_id":{"$numberInt":"351063"},"limit":{"$numberInt":"10000"},"products":["InvestmentStock"]} {"_id":{"$oid":"5ca4bbc7a2dd94ee58162a60"},"account_id":{"$numberInt":"291224"},"limit":{"$numberInt":"10000"},"products":["Commodity","InvestmentStock"]} +{"_id":{"$oid":"5ca4bbc7a2dd94ee58162a60"},"account_id":{"$numberInt":"291224"},"limit":{"$numberInt":"10000"},"products":["Commodity","InvestmentStock"]} diff --git a/sample_analytics/customers.json b/sample_analytics/customers.json index 7c259c9..86af409 100644 --- a/sample_analytics/customers.json +++ b/sample_analytics/customers.json @@ -498,3 +498,9 @@ {"_id":{"$oid":"5ca4bbcea2dd94ee58162c5c"},"username":"smcintyre","name":"Christopher Lawrence","address":"00881 West Flat\nNorth Emily, IL 32130","birthdate":{"$date":{"$numberLong":"857586057000"}},"email":"vkeith@yahoo.com","accounts":[{"$numberInt":"551774"},{"$numberInt":"264502"},{"$numberInt":"599670"},{"$numberInt":"193228"},{"$numberInt":"397774"}],"tier_and_details":{}} {"_id":{"$oid":"5ca4bbcea2dd94ee58162c5d"},"username":"qknight","name":"Gabriel Romero","address":"79375 David Neck\nWest Matthewton, NJ 92863","birthdate":{"$date":{"$numberLong":"42240010000"}},"email":"erica98@gmail.com","accounts":[{"$numberInt":"568852"},{"$numberInt":"351063"},{"$numberInt":"635650"},{"$numberInt":"229182"},{"$numberInt":"732327"},{"$numberInt":"89698"}],"tier_and_details":{}} {"_id":{"$oid":"5ca4bbcea2dd94ee58162c5e"},"username":"ecasey","name":"Brandon Contreras","address":"6942 Connie Skyway\nPatrickville, WA 16551","birthdate":{"$date":{"$numberLong":"120268330000"}},"email":"amber97@hotmail.com","accounts":[{"$numberInt":"896364"},{"$numberInt":"450464"}],"tier_and_details":{"f4cebafe5530421b991303dff297643d":{"tier":"Platinum","benefits":["shopping discounts"],"active":true,"id":"f4cebafe5530421b991303dff297643d"}}} +{"_id":{"$oid":"5ca4bbcea2dd94ee58162c59"},"username":"davidsonomar","name":"Linda Stephens","address":"399 Fuentes Roads\nJoshuaborough, CO 64522","birthdate":{"$date":{"$numberLong":"528484365000"}},"email":"cynthia31@hotmail.com","accounts":[{"$numberInt":"669413"}],"tier_and_details":{}} +{"_id":{"$oid":"5ca4bbcea2dd94ee58162c5a"},"username":"amandawilliams","name":"Brandy Huang","address":"9505 Melissa Streets\nSouth Frankville, NJ 91189","birthdate":{"$date":{"$numberLong":"180627718000"}},"email":"scottjonathan@yahoo.com","accounts":[{"$numberInt":"650729"},{"$numberInt":"991663"},{"$numberInt":"144876"},{"$numberInt":"912504"},{"$numberInt":"88163"}],"tier_and_details":{}} +{"_id":{"$oid":"5ca4bbcea2dd94ee58162c5b"},"username":"stricklandjeffery","name":"Xavier Myers","address":"499 Jonathan Streets Apt. 890\nEast Ashley, MD 76825","birthdate":{"$date":{"$numberLong":"562100715000"}},"email":"fredsmith@yahoo.com","accounts":[{"$numberInt":"285957"},{"$numberInt":"875868"},{"$numberInt":"138703"},{"$numberInt":"122908"},{"$numberInt":"370468"}],"tier_and_details":{}} +{"_id":{"$oid":"5ca4bbcea2dd94ee58162c5c"},"username":"smcintyre","name":"Christopher Lawrence","address":"00881 West Flat\nNorth Emily, IL 32130","birthdate":{"$date":{"$numberLong":"857586057000"}},"email":"vkeith@yahoo.com","accounts":[{"$numberInt":"551774"},{"$numberInt":"264502"},{"$numberInt":"599670"},{"$numberInt":"193228"},{"$numberInt":"397774"}],"tier_and_details":{}} +{"_id":{"$oid":"5ca4bbcea2dd94ee58162c5d"},"username":"qknight","name":"Gabriel Romero","address":"79375 David Neck\nWest Matthewton, NJ 92863","birthdate":{"$date":{"$numberLong":"42240010000"}},"email":"erica98@gmail.com","accounts":[{"$numberInt":"568852"},{"$numberInt":"351063"},{"$numberInt":"635650"},{"$numberInt":"229182"},{"$numberInt":"732327"},{"$numberInt":"89698"}],"tier_and_details":{}} +{"_id":{"$oid":"5ca4bbcea2dd94ee58162c5e"},"username":"ecasey","name":"Brandon Contreras","address":"6942 Connie Skyway\nPatrickville, WA 16551","birthdate":{"$date":{"$numberLong":"120268330000"}},"email":"amber97@hotmail.com","accounts":[{"$numberInt":"896364"},{"$numberInt":"450464"}],"tier_and_details":{"f4cebafe5530421b991303dff297643d":{"tier":"Platinum","benefits":["shopping discounts"],"active":true,"id":"f4cebafe5530421b991303dff297643d"}}} diff --git a/script.sh b/script.sh deleted file mode 100755 index 6b70c77..0000000 --- a/script.sh +++ /dev/null @@ -1,25 +0,0 @@ -#!/bin/bash -# vim:sw=4:ts=4:et:ai:ci:sr:nu:syntax=sh -############################################################## -# Usage ( * = optional ): # -# ./script.sh * * * * # -############################################################## - -if [ ! -z "$3" ]; then - if [ ! -z "$4" ]; then - echo "Using password authentication!" - auth="--authenticationDatabase admin -u $3 -p $4" - fi -fi - -HOST=${1:-localhost} # default server is the localhost -PORT=${2:-27017} # default port for MongoDB is 27017 - -for directory in *; do - if [ -d "${directory}" ] ; then - echo "$directory" - for data_file in $directory/*; do - mongoimport --drop --host $HOST --port $PORT --db "$directory" --collection "$(basename $data_file .json)" --file $data_file $auth - done - fi -done diff --git a/work/import.sh b/work/import.sh new file mode 100644 index 0000000..c2904ac --- /dev/null +++ b/work/import.sh @@ -0,0 +1,97 @@ +#!/bin/bash + +function import_collection { + local collections=("$@") + # cd "$dir" + # echo "$LOGNAME" + > /workspaces/mongodb-sample-dataset/work/mongo.log + docker exec -it mongodb mkdir -p vinay + for ((i = 0; i < ${#collections[@]}; i++)); do + # echo "${json_files[$i]}" + collection="${collections[$i]}" + json_file="${json_files[$i]}.json" + # docker cp "$dir/$json_file" mongodb:"/$LOGNAME/" + # docker exec -it mongodb mongoimport --host localhost --port 27017 --db vinay --collection "$collection" --file "/$LOGNAME/$json_file" --drop + + docker cp "$dir/$json_file" mongodb:"/vinay/" >> /workspaces/mongodb-sample-dataset/work/mongo.log 2>&1 + docker exec -it mongodb mongoimport --host localhost --port 27017 --db vinay --collection "$collection" --file "/vinay/$json_file" --drop >> /workspaces/mongodb-sample-dataset/work/mongo.log 2>&1 + echo $import_log >> /workspaces/mongodb-sample-dataset/work/mongo.log + if [ $? -eq 0 ]; then + echo "Import successful for collection '$collection'." + # docker exec -it mongodb rm -rf "/$LOGNAME/$json_file" + # docker exec -it mongodb rm -rf "/vinay/$json_file" + else + echo "Error importing data into collection '$collection'." + fi + sleep 1 + done + docker exec -it mongodb rm -rf /vinay/ +} + + + +echo "Hey Hi $LOGNAME, do you want to import a MongoDB collection?" +echo "sample_analyticsmongodata: Enter 1" +echo "sample_geospatialmongodata: Enter 2" +echo "sample_mflixmongodata: Enter 3" +echo "exit: Enter 4" +while true; do + read -rp "Enter your choice: " action + case "$action" in + 1) + # List of collection as well JSON files + collections=( + accounts + customers + transactions + ) + json_files=( + accounts + customers + transactions + ) + dir="/workspaces/mongodb-sample-dataset/sample_analytics" + import_collection "${collections[@]}" + break + ;; + 2) + # List of collection names and corresponding JSON files + collections=( + shipwrecks + ) + + json_files=( + shipwrecks + ) + dir="/workspaces/mongodb-sample-dataset/sample_geospatial" + import_collection "${collections[@]}" + break + ;; + 3) + # List of collection names and corresponding JSON files + collections=( + comments + movies + sessions + ) + json_files=( + comments + movies + sessions + ) + dir="/workspaces/mongodb-sample-dataset/sample_mflix" + import_collection "${collections[@]}" + break + ;; + 4) + break + ;; + *) + echo "Hi $LOGNAME You entered $action. Wrong input. Please enter 1, 2, or 3 || 4." + ;; + esac +done +go build /workspaces/mongodb-sample-dataset/work/main.go +./main + +# docker cp "/workspaces/mongodb-sample-dataset/sample_geospatial/shipwrecks.json" mongodb:/vinay/ \ No newline at end of file diff --git a/work/main b/work/main new file mode 100755 index 0000000..a0d7f52 Binary files /dev/null and b/work/main differ diff --git a/work/main.go b/work/main.go new file mode 100644 index 0000000..da646c8 --- /dev/null +++ b/work/main.go @@ -0,0 +1,289 @@ +// package main + +// import ( +// "bufio" +// "encoding/json" +// "fmt" +// "log" +// "os" +// "regexp" +// "strings" +// ) + +// type LogEntry struct { +// Timestamp string `json:"timestamp"` +// Message string `json:"message"` +// Duplicate ObjectIdEntry `json:"duplicate"` +// } + +// type ObjectIdEntry struct { +// Key string `json:"key"` +// Value string `json:"value"` +// } + +// func main() { +// // Open the MongoDB log file +// filePath := "mongo.log" +// file, err := os.Open("mongo.log") +// if err != nil { +// log.Fatal(err) +// } +// defer file.Close() + +// // Create a slice to store log entries +// var logEntries []LogEntry + +// // Create a scanner to read the file line by line +// scanner := bufio.NewScanner(file) + +// for scanner.Scan() { +// line := scanner.Text() +// // Split the line by tab to separate timestamp and message +// parts := strings.Split(line, "\t") +// if len(parts) != 2 { +// continue // Skip lines that don't match the expected format +// } + +// entry := LogEntry{ +// Timestamp: parts[0], +// Message: parts[1], +// } +// logEntries = append(logEntries, entry) +// } + +// if err := scanner.Err(); err != nil { +// log.Fatal(err) +// } +// // fmt.Println(logEntries) + +// regex := regexp.MustCompile(`([^:]+): ObjectId\('([^']+)'\)`) + +// // Extract key-value pairs containing ObjectId from log entries +// var objectIdEntries []ObjectIdEntry +// for _, v := range logEntries { +// if strings.Contains(v.Message, "_id_") { +// matches := regex.FindAllStringSubmatch(v.Message, -1) +// for _, match := range matches { +// if len(match) >= 3 { +// entry := ObjectIdEntry{ +// Key: match[1], +// Value: match[2], +// } +// v.Duplicate.Key = entry.Key +// v.Duplicate.Value = entry.Value +// objectIdEntries = append(objectIdEntries, entry) +// } +// } +// } +// } +// fmt.Println(objectIdEntries) + +// // Convert log entries to JSON +// jsonData, err := json.Marshal(logEntries) +// if err != nil { +// log.Fatal(err) +// } + +// err = os.WriteFile(filePath, jsonData, 0644) +// if err != nil { +// log.Fatal(err) +// } + +// // Print the JSON data +// // fmt.Println(string(jsonData)) +// } + + + +// package main + +// import ( +// "bufio" +// "encoding/json" +// "log" +// "os" +// "regexp" +// "strings" +// ) + +// type LogEntry struct { +// Timestamp string `json:"timestamp"` +// Message string `json:"message"` +// Duplicate []ObjectIdEntry `json:"duplicate,omitempty"` +// } + +// type ObjectIdEntry struct { +// Key string `json:"key"` +// Value string `json:"value"` +// } + +// func main() { +// // Open the MongoDB log file +// filePath := "mongo.log" +// file, err := os.Open("mongo.log") +// if err != nil { +// log.Fatal(err) +// } +// defer file.Close() + +// // Create a slice to store log entries +// var logEntries []LogEntry + +// // Create a scanner to read the file line by line +// scanner := bufio.NewScanner(file) + +// for scanner.Scan() { +// line := scanner.Text() +// // Split the line by tab to separate timestamp and message +// parts := strings.Split(line, "\t") +// if len(parts) != 2 { +// continue // Skip lines that don't match the expected format +// } + +// entry := LogEntry{ +// Timestamp: parts[0], +// Message: parts[1], +// } + +// // Initialize the Duplicate field as an empty slice +// entry.Duplicate = []ObjectIdEntry{} + +// logEntries = append(logEntries, entry) +// } + +// if err := scanner.Err(); err != nil { +// log.Fatal(err) +// } + +// regex := regexp.MustCompile(`([^:]+): ObjectId\('([^']+)'\)`) + +// // Extract key-value pairs containing ObjectId from log entries +// for i := range logEntries { +// if strings.Contains(logEntries[i].Message, "_id_") { +// matches := regex.FindAllStringSubmatch(logEntries[i].Message, -1) +// for _, match := range matches { +// if len(match) >= 3 { +// entry := ObjectIdEntry{ +// Key: match[1], +// Value: match[2], +// } +// logEntries[i]. +// logEntries[i].Duplicate = append(logEntries[i].Duplicate, entry) +// } +// } +// } +// } +// // fmt.Println(logEntries) + +// // Convert log entries to JSON +// jsonData, err := json.Marshal(logEntries) +// if err != nil { +// log.Fatal(err) +// } + +// err = os.WriteFile(filePath, jsonData, 0644) +// if err != nil { +// log.Fatal(err) +// } + +// // Print the JSON data +// // fmt.Println(string(jsonData)) +// } + + + +package main + +import ( + "bufio" + "encoding/json" + "log" + "os" + "regexp" + "strings" +) + +type LogEntry struct { + Timestamp string `json:"timestamp"` + Message string `json:"message"` + Duplicate []ObjectIdEntry `json:"duplicate,omitempty"` +} + +type ObjectIdEntry struct { + Key string `json:"key"` + Value string `json:"value"` +} + +func main() { + // Open the MongoDB log file + filePath := "mongo.log" + file, err := os.Open("mongo.log") + if err != nil { + log.Fatal(err) + } + defer file.Close() + + // Create a slice to store log entries + var logEntries []LogEntry + + // Create a scanner to read the file line by line + scanner := bufio.NewScanner(file) + + for scanner.Scan() { + line := scanner.Text() + // Split the line by tab to separate timestamp and message + parts := strings.Split(line, "\t") + if len(parts) != 2 { + continue // Skip lines that don't match the expected format + } + + entry := LogEntry{ + Timestamp: parts[0], + Message: parts[1], + } + + // Initialize the Duplicate field as an empty slice + entry.Duplicate = []ObjectIdEntry{} + + logEntries = append(logEntries, entry) + } + + if err := scanner.Err(); err != nil { + log.Fatal(err) + } + + regex := regexp.MustCompile(`([^:]+): ObjectId\('([^']+)'\)`) + // regex2 := regexp.MustCompile(`continuing through error: ([^:]+): ObjectId\('([^']+)'\)`) + + // Extract key-value pairs containing ObjectId from log entries + for i := range logEntries { + if strings.Contains(logEntries[i].Message, "_id_") { + + matches := regex.FindAllStringSubmatch(logEntries[i].Message, -1) + for _, match := range matches { + if len(match) >= 3 { + entry := ObjectIdEntry{ + Key: match[1], + Value: match[2], + } + logEntries[i].Duplicate = append(logEntries[i].Duplicate, entry) + } + } + } + } + // fmt.Println(logEntries) + + // Convert log entries to JSON + jsonData, err := json.Marshal(logEntries) + if err != nil { + log.Fatal(err) + } + + err = os.WriteFile(filePath, jsonData, 0644) + if err != nil { + log.Fatal(err) + } + + // Print the JSON data + // fmt.Println(string(jsonData)) +} diff --git a/work/mongo.json b/work/mongo.json new file mode 100644 index 0000000..e69de29 diff --git a/work/mongo.log b/work/mongo.log new file mode 100644 index 0000000..5f459aa --- /dev/null +++ b/work/mongo.log @@ -0,0 +1 @@ +[{"timestamp":"2023-09-09T13:41:38.212+0000","message":"connected to: mongodb://localhost:27017/"},{"timestamp":"2023-09-09T13:41:38.213+0000","message":"dropping: vinay.accounts"},{"timestamp":"2023-09-09T13:41:38.355+0000","message":"continuing through error: E11000 duplicate key error collection: vinay.accounts index: _id_ dup key: { _id: ObjectId('5ca4bbc7a2dd94ee58162a60') }","duplicate":[{"key":" { _id","value":"5ca4bbc7a2dd94ee58162a60"}]},{"timestamp":"2023-09-09T13:41:38.355+0000","message":"1746 document(s) imported successfully. 1 document(s) failed to import."},{"timestamp":"2023-09-09T13:41:39.492+0000","message":"connected to: mongodb://localhost:27017/"},{"timestamp":"2023-09-09T13:41:39.492+0000","message":"dropping: vinay.customers"},{"timestamp":"2023-09-09T13:41:39.611+0000","message":"continuing through error: E11000 duplicate key error collection: vinay.customers index: _id_ dup key: { _id: ObjectId('5ca4bbcea2dd94ee58162c59') }","duplicate":[{"key":" { _id","value":"5ca4bbcea2dd94ee58162c59"}]},{"timestamp":"2023-09-09T13:41:39.611+0000","message":"continuing through error: E11000 duplicate key error collection: vinay.customers index: _id_ dup key: { _id: ObjectId('5ca4bbcea2dd94ee58162c5a') }","duplicate":[{"key":" { _id","value":"5ca4bbcea2dd94ee58162c5a"}]},{"timestamp":"2023-09-09T13:41:39.611+0000","message":"continuing through error: E11000 duplicate key error collection: vinay.customers index: _id_ dup key: { _id: ObjectId('5ca4bbcea2dd94ee58162c5b') }","duplicate":[{"key":" { _id","value":"5ca4bbcea2dd94ee58162c5b"}]},{"timestamp":"2023-09-09T13:41:39.611+0000","message":"continuing through error: E11000 duplicate key error collection: vinay.customers index: _id_ dup key: { _id: ObjectId('5ca4bbcea2dd94ee58162c5c') }","duplicate":[{"key":" { _id","value":"5ca4bbcea2dd94ee58162c5c"}]},{"timestamp":"2023-09-09T13:41:39.611+0000","message":"continuing through error: E11000 duplicate key error collection: vinay.customers index: _id_ dup key: { _id: ObjectId('5ca4bbcea2dd94ee58162c5d') }","duplicate":[{"key":" { _id","value":"5ca4bbcea2dd94ee58162c5d"}]},{"timestamp":"2023-09-09T13:41:39.611+0000","message":"continuing through error: E11000 duplicate key error collection: vinay.customers index: _id_ dup key: { _id: ObjectId('5ca4bbcea2dd94ee58162c5e') }","duplicate":[{"key":" { _id","value":"5ca4bbcea2dd94ee58162c5e"}]},{"timestamp":"2023-09-09T13:41:39.611+0000","message":"500 document(s) imported successfully. 6 document(s) failed to import."},{"timestamp":"2023-09-09T13:41:40.938+0000","message":"connected to: mongodb://localhost:27017/"},{"timestamp":"2023-09-09T13:41:40.938+0000","message":"dropping: vinay.transactions"},{"timestamp":"2023-09-09T13:41:42.301+0000","message":"1746 document(s) imported successfully. 0 document(s) failed to import."}] \ No newline at end of file