[MongoDB-Hadoop]: Connectivity testing

Install Hadoop CDH 4/5, Hive, Pig, Java & MongoDB and set environment variables as below.

[root@dbversity.com ~]# cat ~/.bashrc | grep “export”

export PATH=$PATH:/opt/mongodb/bin
export JAVA_HOME=/usr/java/jdk1.8.0_05
export PATH=$JAVA_HOME/bin:$PATH

export HADOOP_HOME=/hadoop
export PATH=$HADOOP_HOME/bin:$PATH

export HIVE_HOME=/hadoop/hive-0.12.0-cdh5.0.0
export PATH=$HIVE_HOME/bin:$PATH

export PIG_HOME=/hadoop/pig-0.12.0-cdh5.0.0
export PATH=$PIG_HOME/bin:$PATH

[root@dbversity.com ~]
We also need to get the mongo-hadoop jars. If you want to compile them from source, follow the documentation and build from the link as below:
https://github.com/mongodb/mongo-hadoop

The jars will be placed in to build/libs for each module. e.g. for the core module, it will be generated in the core/build/libs directory.

• mongo-hadoop-core-1.3.0.jar
• mongo-hadoop-hive-1.3.0.jar
• mongo-hadoop-pig-1.3.0.jar
• flume-1.3.0.jar
At MongoDB :-

Insert following sample records to MongoDB

db.dbversity_col.insert({“versity_id” : “154”, “versity”: “Oxford”, “student” : “Srinivas”, “technology” : “MongoDB”, “gender” : “M” })
db.dbversity_col.insert({“versity_id” : “155”, “versity”: “AU”, “student” : “John”, “technology” : “CouchBase”, “gender” : “M” })
db.dbversity_col.insert({“versity_id” : “156”, “versity”: “Oxford”, “student” : “Pinky”, “technology” : “MemSQL”, “gender” : “F” })
db.dbversity_col.insert({“versity_id” : “157”, “versity”: “FC”, “student” : “Steves”, “technology” : “Redis”, “gender” : “M” })
db.dbversity_col.insert({“versity_id” : “158”, “versity”: “Oxford”, “student” : “Sweety”, “technology” : “MySQl”, “gender” : “F” })
db.dbversity_col.insert({“versity_id” : “159”, “versity”: “WB”, “student” : “Williams”, “technology” : “Oracle”, “gender” : “M” })
db.dbversity_col.insert({“versity_id” : “160”, “versity”: “Oxford”, “student” : “Lucy”, “technology” : “Elastic”, “gender” : “F” })
db.dbversity_col.insert({“versity_id” : “161”, “versity”: “XYZ”, “student” : “Michelle”, “technology” : “RDBMS”, “gender” : “F” })
db.dbversity_col.insert({“versity_id” : “162”, “versity”: “Oxford”, “student” : “Don”, “technology” : “NoSQL”, “gender” : “M” })
db.dbversity_col.insert({“versity_id” : “163”, “versity”: “BU”, “student” : “Rosy”, “technology” : “AuroraDB”, “gender” : “F” })

[root@dbversity.com ~] # mongo
MongoDB shell version: 2.6.5
connecting to: test
>

> use dbversity_db
switched to db dbversity_db
>
>
> db.dbversity_col.insert({“versity_id” : “154”, “versity”: “Oxford”,”student” : “Srinivas”,”technology” : “MongoDB”,”gender” : “M” })
WriteResult({ “nInserted” : 1 })
> db.dbversity_col.insert({“versity_id” : “155”, “versity”: “AU”,”student” : “John”,”technology” : “CouchBase”,”gender” : “M” })
WriteResult({ “nInserted” : 1 })
> db.dbversity_col.insert({“versity_id” : “156”, “versity”: “Oxford”,”student” : “Pinky”,”technology” : “MemSQL”,”gender” : “F” })
WriteResult({ “nInserted” : 1 })
> db.dbversity_col.insert({“versity_id” : “157”, “versity”: “FC”,”student” : “Steves”,”technology” : “Redis”,”gender” : “M” })
WriteResult({ “nInserted” : 1 })
> db.dbversity_col.insert({“versity_id” : “158”, “versity”: “Oxford”,”student” : “Sweety”,”technology” : “MySQl”,”gender” : “F” })
WriteResult({ “nInserted” : 1 })
> db.dbversity_col.insert({“versity_id” : “159”, “versity”: “WB”,”student” : “Williams”,”technology” : “Oracle”,”gender” : “M” })
WriteResult({ “nInserted” : 1 })
> db.dbversity_col.insert({“versity_id” : “160”, “versity”: “Oxford”,”student” : “Lucy”,”technology” : “Elastic”,”gender” : “F” })
WriteResult({ “nInserted” : 1 })
> db.dbversity_col.insert({“versity_id” : “161”, “versity”: “XYZ”,”student” : “Michelle”,”technology” : “RDBMS”,”gender” : “F” })
WriteResult({ “nInserted” : 1 })
> db.dbversity_col.insert({“versity_id” : “162”, “versity”: “Oxford”,”student” : “Don”,”technology” : “NoSQL”,”gender” : “M” })
WriteResult({ “nInserted” : 1 })
> db.dbversity_col.insert({“versity_id” : “163”, “versity”: “BU”,”student” : “Rosy”,”technology” : “AuroraDB”,”gender” : “F” })
WriteResult({ “nInserted” : 1 })
>
> db.dbversity_col.find()
{ “_id” : ObjectId(“552f56ebdc42c37802601d5d”), “versity_id” : “154”, “versity” : “Oxford”, “student” : “Srinivas”, “technology” : “MongoDB”, “gender” : “M” }
{ “_id” : ObjectId(“552f56ebdc42c37802601d5e”), “versity_id” : “155”, “versity” : “AU”, “student” : “John”, “technology” : “CouchBase”, “gender” : “M” }
{ “_id” : ObjectId(“552f56ebdc42c37802601d5f”), “versity_id” : “156”, “versity” : “Oxford”, “student” : “Pinky”, “technology” : “MemSQL”, “gender” : “F” }
{ “_id” : ObjectId(“552f56ebdc42c37802601d60”), “versity_id” : “157”, “versity” : “FC”, “student” : “Steves”, “technology” : “Redis”, “gender” : “M” }
{ “_id” : ObjectId(“552f56ebdc42c37802601d61”), “versity_id” : “158”, “versity” : “Oxford”, “student” : “Sweety”, “technology” : “MySQl”, “gender” : “F” }
{ “_id” : ObjectId(“552f56ebdc42c37802601d62”), “versity_id” : “159”, “versity” : “WB”, “student” : “Williams”, “technology” : “Oracle”, “gender” : “M” }
{ “_id” : ObjectId(“552f56ebdc42c37802601d63”), “versity_id” : “160”, “versity” : “Oxford”, “student” : “Lucy”, “technology” : “Elastic”, “gender” : “F” }
{ “_id” : ObjectId(“552f56ebdc42c37802601d64”), “versity_id” : “161”, “versity” : “XYZ”, “student” : “Michelle”, “technology” : “RDBMS”, “gender” : “F” }
{ “_id” : ObjectId(“552f56ebdc42c37802601d65”), “versity_id” : “162”, “versity” : “Oxford”, “student” : “Don”, “technology” : “NoSQL”, “gender” : “M” }
{ “_id” : ObjectId(“552f56ebdc42c37802601d66”), “versity_id” : “163”, “versity” : “BU”, “student” : “Rosy”, “technology” : “AuroraDB”, “gender” : “F” }
>

Accessing MongoDB directly from Hive :-

We’re creating a Hive external table with certain fields and data format specified:
[root@dbversity.com ~]# hive

Logging initialized using configuration in jar:file:/hadoop/hive-0.12.0-cdh5.0.0/lib/hive-common-0.12.0-cdh5.0.0.jar!/hive-log4j.properties
SLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in [jar:file:/hadoop/share/hadoop/common/lib/slf4j-log4j12-1.7.5.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/hadoop/hive-0.12.0-cdh5.0.0/lib/slf4j-log4j12-1.7.5.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
SLF4J: Actual binding is of type [org.slf4j.impl.Log4jLoggerFactory]
hive>
>
> create EXTERNAL TABLE dbversity_hive_tbl (
> id string,
> versity_id string,
> student string,
> technology string,
> gender string
> )
> STORED BY ‘com.mongodb.hadoop.hive.MongoStorageHandler’
> WITH SERDEPROPERTIES(‘mongo.columns.mapping’='{“id”:”_id”, “versity_id”:”versity_id”, “student”:”student”, “technology”:”technology”, “gender”:”gender”}’)
> TBLPROPERTIES(‘mongo.uri’=’mongodb://localhost:27017/dbversity_db.dbversity_col’);
OK
Time taken: 2.39 seconds

• MongoStorageHandler for MongoDB connection:
STORED BY ‘com.mongodb.hadoop.hive.MongoStorageHandler’

• SerDe Properties by providing the column mapping between the Hive table and the corresponding MongoDB collection. The two fields’ names are with a Hive table column name (before the colon) and a MongoDB collection column name (after the colon):
WITH SERDEPROPERTIES(‘mongo.columns.mapping’='{“id”:”_id”, “versity_id”:”versity_id”, “student”:”student”, “technology”:”technology”, “gender”:”gender”}’)

• MongoDB location, and the database and collection information, the URL is where the MongoDB server is running and the port number is one of the default port number. Here, the <dbversity_db> (before the dot) is the MongoDB database name and the <dbversity_col> (after the dot) is the collection name in the database:
TBLPROPERTIES(‘mongo.uri’=’mongodb://localhost:27017/dbversity_db.dbversity_col’);
hive>
> show tables;
OK
dbversity_hive_tbl
Time taken: 0.027 seconds, Fetched: 1 row(s)
hive>
>
> select * from dbversity_hive_tbl;
OK
552f56ebdc42c37802601d5d 154 Srinivas MongoDB M
552f56ebdc42c37802601d5e 155 John CouchBase M
552f56ebdc42c37802601d5f 156 Pinky MemSQL F
552f56ebdc42c37802601d60 157 Steves Redis M
552f56ebdc42c37802601d61 158 Sweety MySQl F
552f56ebdc42c37802601d62 159 Williams Oracle M
552f56ebdc42c37802601d63 160 Lucy Elastic F
552f56ebdc42c37802601d64 161 Michelle RDBMS F
552f56ebdc42c37802601d65 162 Don NoSQL M
552f56ebdc42c37802601d66 163 Rosy AuroraDB F
Time taken: 0.547 seconds, Fetched: 10 row(s)
hive>

Archiving MongoDB to Hive/HDFS

If you remove EXTERNAL in above statement to create INTERNAL hive table, it will ingest MongoDB data into Hive on HDFS.
hive>
>
> create TABLE dbversity_hive_tbl_internal_new_1 (
> id string,
> versity_id string,
> student string,
> technology string,
> gender string
> )
> STORED BY ‘com.mongodb.hadoop.hive.MongoStorageHandler’
> WITH SERDEPROPERTIES(‘mongo.columns.mapping’='{“id”:”_id”, “versity_id”:”versity_id”, “student”:”student”, “technology”:”technology”, “gender”:”gender”}’)
> TBLPROPERTIES(‘mongo.uri’=’mongodb://localhost:27017/dbversity_db.dbversity_col_backup’);
OK
Time taken: 0.217 seconds
hive>

At Hadoop filesystem

[root@dbversity.com ~]# hadoop fs -ls /user/hive/warehouse/

drwxr-xr-x – root supergroup 0 2015-04-16 03:30 /user/hive/warehouse/dbversity_hive_tbl_internal
[root@dbversity.com ~]
Usage with Pig
To load records from MongoDB database to use in a Pig script, a class called MongoLoader is provided. To use it, first register the dependency jars in your script and then specify the Mongo URI to load with the MongoLoader class.
Example in Pig
In the example, pig could load data from MongoDB to HDFS and retrieve data back to MongoDB from HDFS.

The connector driver needs to be registered with Hadoop to establish the mongodb connection in applications like Pig:
REGISTER /root/mongo-hadoop/mongo-hadoop-core-1.3.2.jar
REGISTER /root/mongo-hadoop/mongo-hadoop-pig-1.3.2.jar
[root@dbversity.com /hadoop/cloudera/patches]# pig

2015-04-16 03:42:31,506 [main] INFO org.apache.pig.Main – Apache Pig version 0.12.0-cdh5.0.0 (rexported) compiled Mar 27 2014, 23:18:40
2015-04-16 03:42:31,507 [main] INFO org.apache.pig.Main – Logging error messages to: /hadoop/cloudera/patches/pig_1429170151504.log
2015-04-16 03:42:31,524 [main] INFO org.apache.pig.impl.util.Utils – Default bootup file /root/.pigbootup not found
2015-04-16 03:42:31,729 [main] INFO org.apache.hadoop.conf.Configuration.deprecation – mapred.job.tracker is deprecated. Instead, use mapreduce.jobtracker.address
2015-04-16 03:42:31,729 [main] INFO org.apache.hadoop.conf.Configuration.deprecation – fs.default.name is deprecated. Instead, use fs.defaultFS
2015-04-16 03:42:31,729 [main] INFO org.apache.pig.backend.hadoop.executionengine.HExecutionEngine – Connecting to hadoop file system at: hdfs://vm-5cc5-bad6:9000
2015-04-16 03:42:31,962 [main] WARN org.apache.hadoop.util.NativeCodeLoader – Unable to load native-hadoop library for your platform… using builtin-java classes where applicable
2015-04-16 03:42:32,408 [main] INFO org.apache.hadoop.conf.Configuration.deprecation – mapred.job.tracker is deprecated. Instead, use mapreduce.jobtracker.address
2015-04-16 03:42:32,408 [main] INFO org.apache.pig.backend.hadoop.executionengine.HExecutionEngine – Connecting to map-reduce job tracker at: myhostname:9001
2015-04-16 03:42:32,409 [main] INFO org.apache.hadoop.conf.Configuration.deprecation – fs.default.name is deprecated. Instead, use fs.defaultFS
grunt>
grunt>
grunt>
grunt>
grunt>
grunt>
grunt>
grunt>
grunt> REGISTER /root/mongo-hadoop/mongo-hadoop-core-1.3.2.jar
grunt> REGISTER /root/mongo-hadoop/mongo-hadoop-pig-1.3.2.jar
grunt>
• Load the data from MongoDB with the database location, name and the collection name using MongoLoader.
[root@dbversity.com ~]# mongo localhost:27017/dbversity_db
MongoDB shell version: 2.6.5
connecting to: localhost:27017/dbversity_db
>
> show collections
system.indexes
>
>
>
>
> db.dbversity_col.insert({“versity_id” : “154”, “versity”: “Oxford”,”student” : “Srinivas”,”technology” : “MongoDB”,”gender” : “M” })
WriteResult({ “nInserted” : 1 })
> db.dbversity_col.insert({“versity_id” : “155”, “versity”: “AU”,”student” : “John”,”technology” : “CouchBase”,”gender” : “M” })
WriteResult({ “nInserted” : 1 })
> db.dbversity_col.insert({“versity_id” : “156”, “versity”: “Oxford”,”student” : “Pinky”,”technology” : “MemSQL”,”gender” : “F” })
WriteResult({ “nInserted” : 1 })
> db.dbversity_col.insert({“versity_id” : “157”, “versity”: “FC”,”student” : “Steves”,”technology” : “Redis”,”gender” : “M” })
WriteResult({ “nInserted” : 1 })
> db.dbversity_col.insert({“versity_id” : “158”, “versity”: “Oxford”,”student” : “Sweety”,”technology” : “MySQl”,”gender” : “F” })
WriteResult({ “nInserted” : 1 })
> db.dbversity_col.insert({“versity_id” : “159”, “versity”: “WB”,”student” : “Williams”,”technology” : “Oracle”,”gender” : “M” })
WriteResult({ “nInserted” : 1 })
> db.dbversity_col.insert({“versity_id” : “160”, “versity”: “Oxford”,”student” : “Lucy”,”technology” : “Elastic”,”gender” : “F” })
WriteResult({ “nInserted” : 1 })
> db.dbversity_col.insert({“versity_id” : “161”, “versity”: “XYZ”,”student” : “Michelle”,”technology” : “RDBMS”,”gender” : “F” })
WriteResult({ “nInserted” : 1 })
> db.dbversity_col.insert({“versity_id” : “162”, “versity”: “Oxford”,”student” : “Don”,”technology” : “NoSQL”,”gender” : “M” })
WriteResult({ “nInserted” : 1 })
> db.dbversity_col.insert({“versity_id” : “163”, “versity”: “BU”,”student” : “Rosy”,”technology” : “AuroraDB”,”gender” : “F” })
WriteResult({ “nInserted” : 1 })
>
>
> db.dbversity_col.find()
{ “_id” : ObjectId(“552f6a0587b6e0809edf7f9e”), “versity_id” : “154”, “versity” : “Oxford”, “student” : “Srinivas”, “technology” : “MongoDB”, “gender” : “M” }
{ “_id” : ObjectId(“552f6a0587b6e0809edf7f9f”), “versity_id” : “155”, “versity” : “AU”, “student” : “John”, “technology” : “CouchBase”, “gender” : “M” }
{ “_id” : ObjectId(“552f6a0587b6e0809edf7fa0”), “versity_id” : “156”, “versity” : “Oxford”, “student” : “Pinky”, “technology” : “MemSQL”, “gender” : “F” }
{ “_id” : ObjectId(“552f6a0587b6e0809edf7fa1”), “versity_id” : “157”, “versity” : “FC”, “student” : “Steves”, “technology” : “Redis”, “gender” : “M” }
{ “_id” : ObjectId(“552f6a0587b6e0809edf7fa2”), “versity_id” : “158”, “versity” : “Oxford”, “student” : “Sweety”, “technology” : “MySQl”, “gender” : “F” }
{ “_id” : ObjectId(“552f6a0587b6e0809edf7fa3”), “versity_id” : “159”, “versity” : “WB”, “student” : “Williams”, “technology” : “Oracle”, “gender” : “M” }
{ “_id” : ObjectId(“552f6a0587b6e0809edf7fa4”), “versity_id” : “160”, “versity” : “Oxford”, “student” : “Lucy”, “technology” : “Elastic”, “gender” : “F” }
{ “_id” : ObjectId(“552f6a0587b6e0809edf7fa5”), “versity_id” : “161”, “versity” : “XYZ”, “student” : “Michelle”, “technology” : “RDBMS”, “gender” : “F” }
{ “_id” : ObjectId(“552f6a0587b6e0809edf7fa6”), “versity_id” : “162”, “versity” : “Oxford”, “student” : “Don”, “technology” : “NoSQL”, “gender” : “M” }
{ “_id” : ObjectId(“552f6a0687b6e0809edf7fa7”), “versity_id” : “163”, “versity” : “BU”, “student” : “Rosy”, “technology” : “AuroraDB”, “gender” : “F” }
>
At PIG : –

// • Load the data from MongoDB with the database location, name and the collection name using MongoLoader.

grunt> dev_data = LOAD ‘mongodb://localhost:27017/dbversity_db.dbversity_col’ USING com.mongodb.hadoop.pig.MongoLoader;
2015-04-16 03:52:29,311 [main] INFO com.mongodb.hadoop.pig.MongoStorage – Initializing MongoLoader in dynamic schema mode.
2015-04-16 03:52:29,315 [main] INFO com.mongodb.hadoop.pig.MongoStorage – Initializing MongoLoader in dynamic schema mode.
2015-04-16 03:52:29,317 [main] INFO com.mongodb.hadoop.pig.MongoStorage – Initializing MongoLoader in dynamic schema mode.
2015-04-16 03:52:29,326 [main] INFO com.mongodb.hadoop.pig.MongoStorage – Initializing MongoLoader in dynamic schema mode.
2015-04-16 03:52:29,330 [main] INFO com.mongodb.hadoop.pig.MongoStorage – Initializing MongoLoader in dynamic schema mode.
2015-04-16 03:52:29,332 [main] INFO com.mongodb.hadoop.pig.MongoStorage – Initializing MongoLoader in dynamic schema mode.
grunt>
grunt>
// • Store the data into HDFS with specifying an HDFS location, which should not be previously existed:

grunt> STORE dev_data INTO ‘/user/demo/mongo/dbversity_col’;
2015-04-16 03:52:40,017 [main] INFO com.mongodb.hadoop.pig.MongoStorage – Initializing MongoLoader in dynamic schema mode.
2015-04-16 03:52:40,018 [main] INFO com.mongodb.hadoop.pig.MongoStorage – Initializing MongoLoader in dynamic schema mode.
2015-04-16 03:52:40,019 [main] INFO com.mongodb.hadoop.pig.MongoStorage – Initializing MongoLoader in dynamic schema mode.
2015-04-16 03:52:40,025 [main] INFO com.mongodb.hadoop.pig.MongoStorage – Initializing MongoLoader in dynamic schema mode.
2015-04-16 03:52:40,027 [main] INFO com.mongodb.hadoop.pig.MongoStorage – Initializing MongoLoader in dynamic schema mode.
2015-04-16 03:52:40,028 [main] INFO com.mongodb.hadoop.pig.MongoStorage – Initializing MongoLoader in dynamic schema mode.
2015-04-16 03:52:40,030 [main] INFO org.apache.pig.tools.pigstats.ScriptState – Pig features used in the script: UNKNOWN
2015-04-16 03:52:40,030 [main] INFO org.apache.pig.newplan.logical.optimizer.LogicalPlanOptimizer – {RULES_ENABLED=[AddForEach, ColumnMapKeyPrune, DuplicateForEachColumnRewrite, GroupByConstParallelSetter, ImplicitSplitInserter, LimitOptimizer, LoadTypeCastInserter, MergeFilter, MergeForEach, NewPartitionFilterOptimizer, PartitionFilterOptimizer, PushDownForEachFlatten, PushUpFilter, SplitFilter, StreamTypeCastInserter], RULES_DISABLED=[FilterLogicExpressionSimplifier]}
2015-04-16 03:52:40,033 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MRCompiler – File concatenation threshold: 100 optimistic? false
2015-04-16 03:52:40,034 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MultiQueryOptimizer – MR plan size before optimization: 1
2015-04-16 03:52:40,034 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MultiQueryOptimizer – MR plan size after optimization: 1
2015-04-16 03:52:40,042 [main] INFO org.apache.hadoop.metrics.jvm.JvmMetrics – Cannot initialize JVM Metrics with processName=JobTracker, sessionId= – already initialized
2015-04-16 03:52:40,042 [main] INFO org.apache.pig.tools.pigstats.ScriptState – Pig script settings are added to the job
2015-04-16 03:52:40,047 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler – mapred.job.reduce.markreset.buffer.percent is not set, set to default 0.3
2015-04-16 03:52:40,103 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler – creating jar file Job5699432224075199242.jar
2015-04-16 03:52:43,481 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler – jar file Job5699432224075199242.jar created
2015-04-16 03:52:43,492 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler – Setting up single store job
2015-04-16 03:52:43,498 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher – 1 map-reduce job(s) waiting for submission.
2015-04-16 03:52:43,504 [JobControl] INFO org.apache.hadoop.metrics.jvm.JvmMetrics – Cannot initialize JVM Metrics with processName=JobTracker, sessionId= – already initialized
2015-04-16 03:52:43,615 [JobControl] INFO com.mongodb.hadoop.pig.MongoStorage – Initializing MongoLoader in dynamic schema mode.
2015-04-16 03:52:43,637 [JobControl] INFO com.mongodb.hadoop.splitter.StandaloneMongoSplitter – Running splitvector to check splits against mongodb://localhost:27017/dbversity_db.dbversity_col
2015-04-16 03:52:43,643 [JobControl] WARN com.mongodb.hadoop.splitter.StandaloneMongoSplitter – WARNING: No Input Splits were calculated by the split code. Proceeding with a *single* split. Data may be too small, try lowering ‘mongo.input.split_size’ if this is undesirable.
2015-04-16 03:52:43,643 [JobControl] INFO com.mongodb.hadoop.splitter.MongoCollectionSplitter – Created split: min=null, max= null
2015-04-16 03:52:43,646 [JobControl] INFO org.apache.pig.backend.hadoop.executionengine.util.MapRedUtil – Total input paths (combined) to process : 1
2015-04-16 03:52:43,686 [JobControl] INFO org.apache.hadoop.mapreduce.JobSubmitter – number of splits:1
2015-04-16 03:52:43,807 [JobControl] INFO org.apache.hadoop.mapreduce.JobSubmitter – Submitting tokens for job: job_local1081547093_0003
2015-04-16 03:52:43,835 [JobControl] WARN org.apache.hadoop.conf.Configuration – file:/tmp/hadoop-root/mapred/staging/root1081547093/.staging/job_local1081547093_0003/job.xml:an attempt to override final parameter: mapreduce.job.end-notification.max.retry.interval; Ignoring.
2015-04-16 03:52:43,838 [JobControl] WARN org.apache.hadoop.conf.Configuration – file:/tmp/hadoop-root/mapred/staging/root1081547093/.staging/job_local1081547093_0003/job.xml:an attempt to override final parameter: mapreduce.job.end-notification.max.attempts; Ignoring.
2015-04-16 03:52:44,259 [JobControl] INFO org.apache.hadoop.mapred.LocalDistributedCacheManager – Creating symlink: /tmp/hadoop-root/mapred/local/1429170763887/mongo-hadoop-core-1.3.2.jar <- /hadoop/cloudera/patches/mongo-hadoop-core-1.3.2.jar
2015-04-16 03:52:44,293 [JobControl] INFO org.apache.hadoop.mapred.LocalDistributedCacheManager – Localized hdfs://vm-5cc5-bad6:9000/tmp/temp-517300918/tmp1329905624/mongo-hadoop-core-1.3.2.jar as file:/tmp/hadoop-root/mapred/local/1429170763887/mongo-hadoop-core-1.3.2.jar
2015-04-16 03:52:44,314 [JobControl] INFO org.apache.hadoop.mapred.LocalDistributedCacheManager – Creating symlink: /tmp/hadoop-root/mapred/local/1429170763888/mongo-hadoop-pig-1.3.2.jar <- /hadoop/cloudera/patches/mongo-hadoop-pig-1.3.2.jar
2015-04-16 03:52:44,319 [JobControl] INFO org.apache.hadoop.mapred.LocalDistributedCacheManager – Localized hdfs://vm-5cc5-bad6:9000/tmp/temp-517300918/tmp1846727989/mongo-hadoop-pig-1.3.2.jar as file:/tmp/hadoop-root/mapred/local/1429170763888/mongo-hadoop-pig-1.3.2.jar
2015-04-16 03:52:44,356 [JobControl] WARN org.apache.hadoop.conf.Configuration – file:/tmp/hadoop-root/mapred/local/localRunner/root/job_local1081547093_0003/job_local1081547093_0003.xml:an attempt to override final parameter: mapreduce.job.end-notification.max.retry.interval; Ignoring.
2015-04-16 03:52:44,359 [JobControl] WARN org.apache.hadoop.conf.Configuration – file:/tmp/hadoop-root/mapred/local/localRunner/root/job_local1081547093_0003/job_local1081547093_0003.xml:an attempt to override final parameter: mapreduce.job.end-notification.max.attempts; Ignoring.
2015-04-16 03:52:44,359 [JobControl] INFO org.apache.hadoop.mapred.LocalDistributedCacheManager – file:/tmp/hadoop-root/mapred/local/1429170763887/mongo-hadoop-core-1.3.2.jar
2015-04-16 03:52:44,359 [JobControl] INFO org.apache.hadoop.mapred.LocalDistributedCacheManager – file:/tmp/hadoop-root/mapred/local/1429170763888/mongo-hadoop-pig-1.3.2.jar
2015-04-16 03:52:44,365 [JobControl] INFO org.apache.hadoop.mapreduce.Job – The url to track the job: http://localhost:8080/
2015-04-16 03:52:44,365 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher – HadoopJobId: job_local1081547093_0003
2015-04-16 03:52:44,365 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher – Processing aliases dev_data
2015-04-16 03:52:44,365 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher – detailed locations: M: dev_data[5,11] C: R:
2015-04-16 03:52:44,365 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher – More information at: http://myhostname:50030/jobdetails.jsp?jobid=job_local1081547093_0003
2015-04-16 03:52:44,367 [Thread-50] INFO org.apache.hadoop.mapred.LocalJobRunner – OutputCommitter set in config null
2015-04-16 03:52:44,369 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher – 0% complete
2015-04-16 03:52:44,383 [Thread-50] INFO org.apache.hadoop.conf.Configuration.deprecation – mapred.textoutputformat.separator is deprecated. Instead, use mapreduce.output.textoutputformat.separator
2015-04-16 03:52:44,384 [Thread-50] INFO org.apache.hadoop.conf.Configuration.deprecation – mapred.job.tracker is deprecated. Instead, use mapreduce.jobtracker.address
2015-04-16 03:52:44,384 [Thread-50] INFO org.apache.hadoop.conf.Configuration.deprecation – mapred.job.reduce.markreset.buffer.percent is deprecated. Instead, use mapreduce.reduce.markreset.buffer.percent
2015-04-16 03:52:44,385 [Thread-50] INFO org.apache.hadoop.conf.Configuration.deprecation – mapred.jar is deprecated. Instead, use mapreduce.job.jar
2015-04-16 03:52:44,385 [Thread-50] INFO org.apache.hadoop.conf.Configuration.deprecation – fs.default.name is deprecated. Instead, use fs.defaultFS
2015-04-16 03:52:44,389 [Thread-50] INFO org.apache.hadoop.mapred.LocalJobRunner – OutputCommitter is org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigOutputCommitter
2015-04-16 03:52:44,451 [Thread-50] INFO org.apache.hadoop.mapred.LocalJobRunner – Waiting for map tasks
2015-04-16 03:52:44,452 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.LocalJobRunner – Starting task: attempt_local1081547093_0003_m_000000_0
2015-04-16 03:52:44,511 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.Task – Using ResourceCalculatorProcessTree : [ ]
2015-04-16 03:52:44,516 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.MapTask – Processing split: Number of splits :1
Total Length = 9223372036854775807
Input split[0]:
Length = 9223372036854775807
Locations:
localhost:27017

———————–

2015-04-16 03:52:44,521 [LocalJobRunner Map Task Executor #0] INFO com.mongodb.hadoop.pig.MongoStorage – Initializing MongoLoader in dynamic schema mode.
2015-04-16 03:52:44,532 [LocalJobRunner Map Task Executor #0] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigRecordReader – Current split being processed MongoInputSplit{URI=mongodb://localhost:27017/dbversity_db.dbversity_col, authURI=null, min={ }, max={ }, query={ }, sort={ }, fields={ }, notimeout=false}
2015-04-16 03:52:44,569 [LocalJobRunner Map Task Executor #0] INFO org.apache.pig.data.SchemaTupleBackend – Key [pig.schematuple] was not set… will not generate code.
2015-04-16 03:52:44,573 [LocalJobRunner Map Task Executor #0] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigMapOnly$Map – Aliases being processed per job phase (AliasName[line,offset]): M: dev_data[5,11] C: R:
2015-04-16 03:52:44,594 [LocalJobRunner Map Task Executor #0] INFO com.mongodb.hadoop.input.MongoRecordReader – Read 10.0 documents from:
2015-04-16 03:52:44,595 [LocalJobRunner Map Task Executor #0] INFO com.mongodb.hadoop.input.MongoRecordReader – MongoInputSplit{URI=mongodb://localhost:27017/dbversity_db.dbversity_col, authURI=null, min={ }, max={ }, query={ }, sort={ }, fields={ }, notimeout=false}
2015-04-16 03:52:44,599 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.LocalJobRunner –
2015-04-16 03:52:44,615 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.Task – Task:attempt_local1081547093_0003_m_000000_0 is done. And is in the process of committing
2015-04-16 03:52:44,633 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.LocalJobRunner –
2015-04-16 03:52:44,633 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.Task – Task attempt_local1081547093_0003_m_000000_0 is allowed to commit now
2015-04-16 03:52:44,645 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter – Saved output of task ‘attempt_local1081547093_0003_m_000000_0’ to hdfs://vm-5cc5-bad6:9000/user/demo/mongo/dbversity_col/_temporary/0/task_local1081547093_0003_m_000000
2015-04-16 03:52:44,646 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.LocalJobRunner – map
2015-04-16 03:52:44,646 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.Task – Task ‘attempt_local1081547093_0003_m_000000_0’ done.
2015-04-16 03:52:44,646 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.LocalJobRunner – Finishing task: attempt_local1081547093_0003_m_000000_0
2015-04-16 03:52:44,646 [Thread-50] INFO org.apache.hadoop.mapred.LocalJobRunner – map task executor complete.
2015-04-16 03:52:49,383 [main] INFO org.apache.hadoop.conf.Configuration.deprecation – mapred.map.tasks is deprecated. Instead, use mapreduce.job.maps
2015-04-16 03:52:49,383 [main] INFO org.apache.hadoop.conf.Configuration.deprecation – mapred.reduce.tasks is deprecated. Instead, use mapreduce.job.reduces
2015-04-16 03:52:49,384 [main] WARN org.apache.pig.tools.pigstats.PigStatsUtil – Failed to get RunningJob for job job_local1081547093_0003
2015-04-16 03:52:49,384 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher – 100% complete
2015-04-16 03:52:49,387 [main] INFO org.apache.pig.tools.pigstats.SimplePigStats – Script Statistics:

HadoopVersion PigVersion UserId StartedAt FinishedAt Features
2.3.0-cdh5.0.0 0.12.0-cdh5.0.0 root 2015-04-16 03:52:40 2015-04-16 03:52:49 UNKNOWN

Success!

Job Stats (time in seconds):
JobId Maps Reduces MaxMapTime MinMapTIme AvgMapTime MedianMapTime MaxReduceTime MinReduceTime AvgReduceTime MedianReducetime Alias Feature Outputs
job_local1081547093_0003 1 0 0 0 0 0 n/a n/a n/a n/a dev_data MAP_ONLY /user/demo/mongo/dbversity_col,

Input(s):
Successfully read 0 records from: “mongodb://localhost:27017/dbversity_db.dbversity_col”

Output(s):
Successfully stored 0 records in: “/user/demo/mongo/dbversity_col”

Counters:
Total records written : 0
Total bytes written : 0
Spillable Memory Manager spill count : 0
Total bags proactively spilled: 0
Total records proactively spilled: 0

Job DAG:
job_local1081547093_0003
2015-04-16 03:52:49,387 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher – Success!
grunt>

• Check the results in HDFS:

[root@dbversity.com ~]# hadoop fs -ls /user/demo/mongo/dbversity_col
Found 2 items
-rw-r–r– 3 root supergroup 0 2015-04-16 03:52 /user/demo/mongo/dbversity_col/_SUCCESS
-rw-r–r– 3 root supergroup 1010 2015-04-16 03:52 /user/demo/mongo/dbversity_col/part-m-00000

[root@dbversity.com ~]
[root@dbversity.com ~]# hadoop fs -cat /user/demo/mongo/dbversity_col/part-m-00000

[versity_id#154,_id#552f6a0587b6e0809edf7f9e,versity#Oxford,technology#MongoDB,gender#M,student#Srinivas]
[versity_id#155,_id#552f6a0587b6e0809edf7f9f,versity#AU,technology#CouchBase,gender#M,student#John]
[versity_id#156,_id#552f6a0587b6e0809edf7fa0,versity#Oxford,technology#MemSQL,gender#F,student#Pinky]
[versity_id#157,_id#552f6a0587b6e0809edf7fa1,versity#FC,technology#Redis,gender#M,student#Steves]
[versity_id#158,_id#552f6a0587b6e0809edf7fa2,versity#Oxford,technology#MySQl,gender#F,student#Sweety]
[versity_id#159,_id#552f6a0587b6e0809edf7fa3,versity#WB,technology#Oracle,gender#M,student#Williams]
[versity_id#160,_id#552f6a0587b6e0809edf7fa4,versity#Oxford,technology#Elastic,gender#F,student#Lucy]
[versity_id#161,_id#552f6a0587b6e0809edf7fa5,versity#XYZ,technology#RDBMS,gender#F,student#Michelle]
[versity_id#162,_id#552f6a0587b6e0809edf7fa6,versity#Oxford,technology#NoSQL,gender#M,student#Don]
[versity_id#163,_id#552f6a0687b6e0809edf7fa7,versity#BU,technology#AuroraDB,gender#F,student#Rosy]
[root@dbversity.com ~]

2. Retrieve data back from HDFS to MongoDB
[root@dbversity.com ~]# pig
2015-04-16 04:08:32,188 [main] INFO org.apache.pig.Main – Apache Pig version 0.12.0-cdh5.0.0 (rexported) compiled Mar 27 2014, 23:18:40
2015-04-16 04:08:32,189 [main] INFO org.apache.pig.Main – Logging error messages to: /root/pig_1429171712187.log
2015-04-16 04:08:32,207 [main] INFO org.apache.pig.impl.util.Utils – Default bootup file /root/.pigbootup not found
2015-04-16 04:08:32,408 [main] INFO org.apache.hadoop.conf.Configuration.deprecation – mapred.job.tracker is deprecated. Instead, use mapreduce.jobtracker.address
2015-04-16 04:08:32,408 [main] INFO org.apache.hadoop.conf.Configuration.deprecation – fs.default.name is deprecated. Instead, use fs.defaultFS
2015-04-16 04:08:32,408 [main] INFO org.apache.pig.backend.hadoop.executionengine.HExecutionEngine – Connecting to hadoop file system at: hdfs://vm-5cc5-bad6:9000
2015-04-16 04:08:32,640 [main] WARN org.apache.hadoop.util.NativeCodeLoader – Unable to load native-hadoop library for your platform… using builtin-java classes where applicable
2015-04-16 04:08:33,100 [main] INFO org.apache.hadoop.conf.Configuration.deprecation – mapred.job.tracker is deprecated. Instead, use mapreduce.jobtracker.address
2015-04-16 04:08:33,100 [main] INFO org.apache.pig.backend.hadoop.executionengine.HExecutionEngine – Connecting to map-reduce job tracker at: myhostname:9001
2015-04-16 04:08:33,102 [main] INFO org.apache.hadoop.conf.Configuration.deprecation – fs.default.name is deprecated. Instead, use fs.defaultFS
grunt>
grunt>
grunt> bkp_data = LOAD ‘/user/demo/mongo/dbversity_col/part-m-00000’ USING PigStorage();
grunt>
grunt> dump bkp_data;
2015-04-16 04:10:00,544 [main] INFO org.apache.pig.tools.pigstats.ScriptState – Pig features used in the script: UNKNOWN
2015-04-16 04:10:00,577 [main] INFO org.apache.pig.newplan.logical.optimizer.LogicalPlanOptimizer – {RULES_ENABLED=[AddForEach, ColumnMapKeyPrune, DuplicateForEachColumnRewrite, GroupByConstParallelSetter, ImplicitSplitInserter, LimitOptimizer, LoadTypeCastInserter, MergeFilter, MergeForEach, NewPartitionFilterOptimizer, PartitionFilterOptimizer, PushDownForEachFlatten, PushUpFilter, SplitFilter, StreamTypeCastInserter], RULES_DISABLED=[FilterLogicExpressionSimplifier]}
2015-04-16 04:10:00,640 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MRCompiler – File concatenation threshold: 100 optimistic? false
2015-04-16 04:10:00,690 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MultiQueryOptimizer – MR plan size before optimization: 1
2015-04-16 04:10:00,690 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MultiQueryOptimizer – MR plan size after optimization: 1
2015-04-16 04:10:00,712 [main] INFO org.apache.hadoop.conf.Configuration.deprecation – session.id is deprecated. Instead, use dfs.metrics.session-id
2015-04-16 04:10:00,713 [main] INFO org.apache.hadoop.metrics.jvm.JvmMetrics – Initializing JVM Metrics with processName=JobTracker, sessionId=
2015-04-16 04:10:00,733 [main] INFO org.apache.pig.tools.pigstats.ScriptState – Pig script settings are added to the job
2015-04-16 04:10:00,830 [main] INFO org.apache.hadoop.conf.Configuration.deprecation – mapred.job.reduce.markreset.buffer.percent is deprecated. Instead, use mapreduce.reduce.markreset.buffer.percent
2015-04-16 04:10:00,831 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler – mapred.job.reduce.markreset.buffer.percent is not set, set to default 0.3
2015-04-16 04:10:00,831 [main] INFO org.apache.hadoop.conf.Configuration.deprecation – mapred.output.compress is deprecated. Instead, use mapreduce.output.fileoutputformat.compress
2015-04-16 04:10:00,833 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler – creating jar file Job2863618857501032894.jar
2015-04-16 04:10:04,364 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler – jar file Job2863618857501032894.jar created
2015-04-16 04:10:04,364 [main] INFO org.apache.hadoop.conf.Configuration.deprecation – mapred.jar is deprecated. Instead, use mapreduce.job.jar
2015-04-16 04:10:04,395 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler – Setting up single store job
2015-04-16 04:10:04,420 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher – 1 map-reduce job(s) waiting for submission.
2015-04-16 04:10:04,421 [main] INFO org.apache.hadoop.conf.Configuration.deprecation – mapred.job.tracker.http.address is deprecated. Instead, use mapreduce.jobtracker.http.address
2015-04-16 04:10:04,433 [JobControl] INFO org.apache.hadoop.metrics.jvm.JvmMetrics – Cannot initialize JVM Metrics with processName=JobTracker, sessionId= – already initialized
2015-04-16 04:10:04,874 [JobControl] INFO org.apache.hadoop.mapreduce.lib.input.FileInputFormat – Total input paths to process : 1
2015-04-16 04:10:04,874 [JobControl] INFO org.apache.pig.backend.hadoop.executionengine.util.MapRedUtil – Total input paths to process : 1
2015-04-16 04:10:04,921 [JobControl] INFO org.apache.pig.backend.hadoop.executionengine.util.MapRedUtil – Total input paths (combined) to process : 1
2015-04-16 04:10:04,969 [JobControl] INFO org.apache.hadoop.mapreduce.JobSubmitter – number of splits:1
2015-04-16 04:10:05,091 [JobControl] INFO org.apache.hadoop.mapreduce.JobSubmitter – Submitting tokens for job: job_local159269264_0001
2015-04-16 04:10:05,135 [JobControl] WARN org.apache.hadoop.conf.Configuration – file:/tmp/hadoop-root/mapred/staging/root159269264/.staging/job_local159269264_0001/job.xml:an attempt to override final parameter: mapreduce.job.end-notification.max.retry.interval; Ignoring.
2015-04-16 04:10:05,139 [JobControl] WARN org.apache.hadoop.conf.Configuration – file:/tmp/hadoop-root/mapred/staging/root159269264/.staging/job_local159269264_0001/job.xml:an attempt to override final parameter: mapreduce.job.end-notification.max.attempts; Ignoring.
2015-04-16 04:10:05,238 [JobControl] WARN org.apache.hadoop.conf.Configuration – file:/tmp/hadoop-root/mapred/local/localRunner/root/job_local159269264_0001/job_local159269264_0001.xml:an attempt to override final parameter: mapreduce.job.end-notification.max.retry.interval; Ignoring.
2015-04-16 04:10:05,242 [JobControl] WARN org.apache.hadoop.conf.Configuration – file:/tmp/hadoop-root/mapred/local/localRunner/root/job_local159269264_0001/job_local159269264_0001.xml:an attempt to override final parameter: mapreduce.job.end-notification.max.attempts; Ignoring.
2015-04-16 04:10:05,248 [JobControl] INFO org.apache.hadoop.mapreduce.Job – The url to track the job: http://localhost:8080/
2015-04-16 04:10:05,251 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher – HadoopJobId: job_local159269264_0001
2015-04-16 04:10:05,251 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher – Processing aliases bkp_data
2015-04-16 04:10:05,251 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher – detailed locations: M: bkp_data[1,11] C: R:
2015-04-16 04:10:05,251 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher – More information at: http://myhostname:50030/jobdetails.jsp?jobid=job_local159269264_0001
2015-04-16 04:10:05,254 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher – 0% complete
2015-04-16 04:10:05,257 [Thread-17] INFO org.apache.hadoop.mapred.LocalJobRunner – OutputCommitter set in config null
2015-04-16 04:10:05,273 [Thread-17] INFO org.apache.hadoop.conf.Configuration.deprecation – mapred.job.tracker is deprecated. Instead, use mapreduce.jobtracker.address
2015-04-16 04:10:05,273 [Thread-17] INFO org.apache.hadoop.conf.Configuration.deprecation – mapred.job.reduce.markreset.buffer.percent is deprecated. Instead, use mapreduce.reduce.markreset.buffer.percent
2015-04-16 04:10:05,273 [Thread-17] INFO org.apache.hadoop.conf.Configuration.deprecation – mapred.jar is deprecated. Instead, use mapreduce.job.jar
2015-04-16 04:10:05,274 [Thread-17] INFO org.apache.hadoop.conf.Configuration.deprecation – fs.default.name is deprecated. Instead, use fs.defaultFS
2015-04-16 04:10:05,278 [Thread-17] INFO org.apache.hadoop.mapred.LocalJobRunner – OutputCommitter is org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigOutputCommitter
2015-04-16 04:10:05,355 [Thread-17] INFO org.apache.hadoop.mapred.LocalJobRunner – Waiting for map tasks
2015-04-16 04:10:05,356 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.LocalJobRunner – Starting task: attempt_local159269264_0001_m_000000_0
2015-04-16 04:10:05,419 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.Task – Using ResourceCalculatorProcessTree : [ ]
2015-04-16 04:10:05,427 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.MapTask – Processing split: Number of splits :1
Total Length = 1010
Input split[0]:
Length = 1010
Locations:

———————–

2015-04-16 04:10:05,441 [LocalJobRunner Map Task Executor #0] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigRecordReader – Current split being processed hdfs://vm-5cc5-bad6:9000/user/demo/mongo/dbversity_col/part-m-00000:0+1010
2015-04-16 04:10:05,519 [LocalJobRunner Map Task Executor #0] INFO org.apache.pig.data.SchemaTupleBackend – Key [pig.schematuple] was not set… will not generate code.
2015-04-16 04:10:05,526 [LocalJobRunner Map Task Executor #0] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigMapOnly$Map – Aliases being processed per job phase (AliasName[line,offset]): M: bkp_data[1,11] C: R:
2015-04-16 04:10:05,604 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.LocalJobRunner –
2015-04-16 04:10:05,729 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.Task – Task:attempt_local159269264_0001_m_000000_0 is done. And is in the process of committing
2015-04-16 04:10:05,742 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.LocalJobRunner –
2015-04-16 04:10:05,742 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.Task – Task attempt_local159269264_0001_m_000000_0 is allowed to commit now
2015-04-16 04:10:05,754 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter – Saved output of task ‘attempt_local159269264_0001_m_000000_0’ to hdfs://vm-5cc5-bad6:9000/tmp/temp1317874299/tmp-848760427/_temporary/0/task_local159269264_0001_m_000000
2015-04-16 04:10:05,758 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.LocalJobRunner – map
2015-04-16 04:10:05,759 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.Task – Task ‘attempt_local159269264_0001_m_000000_0’ done.
2015-04-16 04:10:05,759 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.LocalJobRunner – Finishing task: attempt_local159269264_0001_m_000000_0
2015-04-16 04:10:05,759 [Thread-17] INFO org.apache.hadoop.mapred.LocalJobRunner – map task executor complete.
2015-04-16 04:10:10,268 [main] INFO org.apache.hadoop.conf.Configuration.deprecation – mapred.map.tasks is deprecated. Instead, use mapreduce.job.maps
2015-04-16 04:10:10,268 [main] INFO org.apache.hadoop.conf.Configuration.deprecation – mapred.reduce.tasks is deprecated. Instead, use mapreduce.job.reduces
2015-04-16 04:10:10,269 [main] WARN org.apache.pig.tools.pigstats.PigStatsUtil – Failed to get RunningJob for job job_local159269264_0001
2015-04-16 04:10:10,274 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher – 100% complete
2015-04-16 04:10:10,277 [main] INFO org.apache.pig.tools.pigstats.SimplePigStats – Script Statistics:

HadoopVersion PigVersion UserId StartedAt FinishedAt Features
2.3.0-cdh5.0.0 0.12.0-cdh5.0.0 root 2015-04-16 04:10:00 2015-04-16 04:10:10 UNKNOWN

Success!

Job Stats (time in seconds):
JobId Maps Reduces MaxMapTime MinMapTIme AvgMapTime MedianMapTime MaxReduceTime MinReduceTime AvgReduceTime MedianReducetime Alias Feature Outputs
job_local159269264_0001 1 0 0 0 0 0 n/a n/a n/a n/a bkp_data MAP_ONLY hdfs://vm-5cc5-bad6:9000/tmp/temp1317874299/tmp-848760427,

Input(s):
Successfully read 0 records from: “/user/demo/mongo/dbversity_col/part-m-00000”

Output(s):
Successfully stored 0 records in: “hdfs://vm-5cc5-bad6:9000/tmp/temp1317874299/tmp-848760427”

Counters:
Total records written : 0
Total bytes written : 0
Spillable Memory Manager spill count : 0
Total bags proactively spilled: 0
Total records proactively spilled: 0

Job DAG:
job_local159269264_0001
2015-04-16 04:10:10,277 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher – Success!
2015-04-16 04:10:10,279 [main] INFO org.apache.hadoop.conf.Configuration.deprecation – fs.default.name is deprecated. Instead, use fs.defaultFS
2015-04-16 04:10:10,279 [main] INFO org.apache.hadoop.conf.Configuration.deprecation – mapred.job.tracker is deprecated. Instead, use mapreduce.jobtracker.address
2015-04-16 04:10:10,280 [main] WARN org.apache.pig.data.SchemaTupleBackend – SchemaTupleBackend has already been initialized
2015-04-16 04:10:10,287 [main] INFO org.apache.hadoop.mapreduce.lib.input.FileInputFormat – Total input paths to process : 1
2015-04-16 04:10:10,287 [main] INFO org.apache.pig.backend.hadoop.executionengine.util.MapRedUtil – Total input paths to process : 1
([versity_id#154,_id#552f6a0587b6e0809edf7f9e,versity#Oxford,technology#MongoDB,gender#M,student#Srinivas])
([versity_id#155,_id#552f6a0587b6e0809edf7f9f,versity#AU,technology#CouchBase,gender#M,student#John])
([versity_id#156,_id#552f6a0587b6e0809edf7fa0,versity#Oxford,technology#MemSQL,gender#F,student#Pinky])
([versity_id#157,_id#552f6a0587b6e0809edf7fa1,versity#FC,technology#Redis,gender#M,student#Steves])
([versity_id#158,_id#552f6a0587b6e0809edf7fa2,versity#Oxford,technology#MySQl,gender#F,student#Sweety])
([versity_id#159,_id#552f6a0587b6e0809edf7fa3,versity#WB,technology#Oracle,gender#M,student#Williams])
([versity_id#160,_id#552f6a0587b6e0809edf7fa4,versity#Oxford,technology#Elastic,gender#F,student#Lucy])
([versity_id#161,_id#552f6a0587b6e0809edf7fa5,versity#XYZ,technology#RDBMS,gender#F,student#Michelle])
([versity_id#162,_id#552f6a0587b6e0809edf7fa6,versity#Oxford,technology#NoSQL,gender#M,student#Don])
([versity_id#163,_id#552f6a0687b6e0809edf7fa7,versity#BU,technology#AuroraDB,gender#F,student#Rosy])
grunt>
grunt>
grunt>
grunt>

At MongoDB

>
> db.createCollection(“dbversity_col_backup”);
{ “ok” : 1 }
>
grunt> dbversity_db = FOREACH bkp_data GENERATE $0#’versity_id’ as versity_id,$0#’technology’ as technology,$0#’student’ as student,$0#’gender’ as gender;
grunt>
grunt>
grunt> STORE dbversity_db into ‘mongodb://localhost:27017/dbversity_db.dbversity_col_backup’ using com.mongodb.hadoop.pig.MongoInsertStorage;
2015-04-16 04:13:21,516 [main] WARN org.apache.pig.PigServer – Encountered Warning IMPLICIT_CAST_TO_MAP 4 time(s).
2015-04-16 04:13:21,536 [main] WARN org.apache.pig.PigServer – Encountered Warning IMPLICIT_CAST_TO_MAP 4 time(s).
2015-04-16 04:13:21,537 [main] INFO org.apache.pig.tools.pigstats.ScriptState – Pig features used in the script: UNKNOWN
2015-04-16 04:13:21,539 [main] INFO org.apache.pig.newplan.logical.optimizer.LogicalPlanOptimizer – {RULES_ENABLED=[AddForEach, ColumnMapKeyPrune, DuplicateForEachColumnRewrite, GroupByConstParallelSetter, ImplicitSplitInserter, LimitOptimizer, LoadTypeCastInserter, MergeFilter, MergeForEach, NewPartitionFilterOptimizer, PartitionFilterOptimizer, PushDownForEachFlatten, PushUpFilter, SplitFilter, StreamTypeCastInserter], RULES_DISABLED=[FilterLogicExpressionSimplifier]}
2015-04-16 04:13:21,545 [main] INFO com.mongodb.hadoop.pig.MongoStorage – Store Location Config: Configuration: For URI: mongodb://localhost:27017/dbversity_db.dbversity_col_backup
2015-04-16 04:13:21,584 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MRCompiler – File concatenation threshold: 100 optimistic? false
2015-04-16 04:13:21,586 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MultiQueryOptimizer – MR plan size before optimization: 1
2015-04-16 04:13:21,586 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MultiQueryOptimizer – MR plan size after optimization: 1
2015-04-16 04:13:21,588 [main] INFO org.apache.hadoop.metrics.jvm.JvmMetrics – Cannot initialize JVM Metrics with processName=JobTracker, sessionId= – already initialized
2015-04-16 04:13:21,588 [main] INFO org.apache.pig.tools.pigstats.ScriptState – Pig script settings are added to the job
2015-04-16 04:13:21,610 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler – mapred.job.reduce.markreset.buffer.percent is not set, set to default 0.3
2015-04-16 04:13:21,611 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler – creating jar file Job1299826134286018992.jar
2015-04-16 04:13:25,372 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler – jar file Job1299826134286018992.jar created
2015-04-16 04:13:25,381 [main] INFO com.mongodb.hadoop.pig.MongoStorage – Store Location Config: Configuration: For URI: mongodb://localhost:27017/dbversity_db.dbversity_col_backup
2015-04-16 04:13:25,381 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler – Setting up single store job
2015-04-16 04:13:25,386 [main] INFO org.apache.pig.data.SchemaTupleFrontend – Key [pig.schematuple] is false, will not generate code.
2015-04-16 04:13:25,387 [main] INFO org.apache.pig.data.SchemaTupleFrontend – Starting process to move generated code to distributed cache
2015-04-16 04:13:25,387 [main] INFO org.apache.pig.data.SchemaTupleFrontend – Setting key [pig.schematuple.classes] with classes to deserialize []
2015-04-16 04:13:25,414 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher – 1 map-reduce job(s) waiting for submission.
2015-04-16 04:13:25,416 [JobControl] INFO org.apache.hadoop.metrics.jvm.JvmMetrics – Cannot initialize JVM Metrics with processName=JobTracker, sessionId= – already initialized
2015-04-16 04:13:25,421 [JobControl] INFO com.mongodb.hadoop.pig.MongoStorage – Store Location Config: Configuration: For URI: mongodb://localhost:27017/dbversity_db.dbversity_col_backup
2015-04-16 04:13:25,507 [JobControl] INFO org.apache.hadoop.mapreduce.lib.input.FileInputFormat – Total input paths to process : 1
2015-04-16 04:13:25,507 [JobControl] INFO org.apache.pig.backend.hadoop.executionengine.util.MapRedUtil – Total input paths to process : 1
2015-04-16 04:13:25,509 [JobControl] INFO org.apache.pig.backend.hadoop.executionengine.util.MapRedUtil – Total input paths (combined) to process : 1
2015-04-16 04:13:25,544 [JobControl] INFO org.apache.hadoop.mapreduce.JobSubmitter – number of splits:1
2015-04-16 04:13:25,572 [JobControl] INFO org.apache.hadoop.mapreduce.JobSubmitter – Submitting tokens for job: job_local1045407920_0002
2015-04-16 04:13:25,591 [JobControl] WARN org.apache.hadoop.conf.Configuration – file:/tmp/hadoop-root/mapred/staging/root1045407920/.staging/job_local1045407920_0002/job.xml:an attempt to override final parameter: mapreduce.job.end-notification.max.retry.interval; Ignoring.
2015-04-16 04:13:25,593 [JobControl] WARN org.apache.hadoop.conf.Configuration – file:/tmp/hadoop-root/mapred/staging/root1045407920/.staging/job_local1045407920_0002/job.xml:an attempt to override final parameter: mapreduce.job.end-notification.max.attempts; Ignoring.
2015-04-16 04:13:25,677 [JobControl] WARN org.apache.hadoop.conf.Configuration – file:/tmp/hadoop-root/mapred/local/localRunner/root/job_local1045407920_0002/job_local1045407920_0002.xml:an attempt to override final parameter: mapreduce.job.end-notification.max.retry.interval; Ignoring.
2015-04-16 04:13:25,679 [JobControl] WARN org.apache.hadoop.conf.Configuration – file:/tmp/hadoop-root/mapred/local/localRunner/root/job_local1045407920_0002/job_local1045407920_0002.xml:an attempt to override final parameter: mapreduce.job.end-notification.max.attempts; Ignoring.
2015-04-16 04:13:25,680 [JobControl] INFO org.apache.hadoop.mapreduce.Job – The url to track the job: http://localhost:8080/
2015-04-16 04:13:25,683 [Thread-39] INFO org.apache.hadoop.mapred.LocalJobRunner – OutputCommitter set in config null
2015-04-16 04:13:25,693 [Thread-39] INFO com.mongodb.hadoop.pig.MongoStorage – Store Location Config: Configuration: For URI: mongodb://localhost:27017/dbversity_db.dbversity_col_backup
2015-04-16 04:13:25,694 [Thread-39] INFO org.apache.hadoop.conf.Configuration.deprecation – mapred.job.tracker is deprecated. Instead, use mapreduce.jobtracker.address
2015-04-16 04:13:25,694 [Thread-39] INFO org.apache.hadoop.conf.Configuration.deprecation – mapred.job.reduce.markreset.buffer.percent is deprecated. Instead, use mapreduce.reduce.markreset.buffer.percent
2015-04-16 04:13:25,695 [Thread-39] INFO org.apache.hadoop.conf.Configuration.deprecation – mapred.jar is deprecated. Instead, use mapreduce.job.jar
2015-04-16 04:13:25,695 [Thread-39] INFO org.apache.hadoop.conf.Configuration.deprecation – fs.default.name is deprecated. Instead, use fs.defaultFS
2015-04-16 04:13:25,696 [Thread-39] INFO org.apache.hadoop.mapred.LocalJobRunner – OutputCommitter is org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigOutputCommitter
2015-04-16 04:13:25,697 [Thread-39] INFO com.mongodb.hadoop.pig.MongoStorage – Store Location Config: Configuration: core-default.xml, core-site.xml, mapred-default.xml, mapred-site.xml, yarn-default.xml, yarn-site.xml, hdfs-default.xml, hdfs-site.xml, file:/tmp/hadoop-root/mapred/local/localRunner/root/job_local1045407920_0002/job_local1045407920_0002.xml For URI: mongodb://localhost:27017/dbversity_db.dbversity_col_backup
2015-04-16 04:13:25,698 [Thread-39] INFO com.mongodb.hadoop.output.MongoOutputCommitter – Setting up job.
2015-04-16 04:13:25,699 [Thread-39] INFO org.apache.hadoop.mapred.LocalJobRunner – Waiting for map tasks
2015-04-16 04:13:25,700 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.LocalJobRunner – Starting task: attempt_local1045407920_0002_m_000000_0
2015-04-16 04:13:25,708 [LocalJobRunner Map Task Executor #0] INFO com.mongodb.hadoop.pig.MongoStorage – Store Location Config: Configuration: core-default.xml, core-site.xml, mapred-default.xml, mapred-site.xml, yarn-default.xml, yarn-site.xml, hdfs-default.xml, hdfs-site.xml, file:/tmp/hadoop-root/mapred/local/localRunner/root/job_local1045407920_0002/job_local1045407920_0002.xml For URI: mongodb://localhost:27017/dbversity_db.dbversity_col_backup
2015-04-16 04:13:25,711 [LocalJobRunner Map Task Executor #0] INFO com.mongodb.hadoop.pig.MongoStorage – Store Location Config: Configuration: core-default.xml, core-site.xml, mapred-default.xml, mapred-site.xml, yarn-default.xml, yarn-site.xml, hdfs-default.xml, hdfs-site.xml, file:/tmp/hadoop-root/mapred/local/localRunner/root/job_local1045407920_0002/job_local1045407920_0002.xml For URI: mongodb://localhost:27017/dbversity_db.dbversity_col_backup
2015-04-16 04:13:25,712 [LocalJobRunner Map Task Executor #0] INFO com.mongodb.hadoop.output.MongoOutputCommitter – Setting up task.
2015-04-16 04:13:25,712 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.Task – Using ResourceCalculatorProcessTree : [ ]
2015-04-16 04:13:25,714 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.MapTask – Processing split: Number of splits :1
Total Length = 1010
Input split[0]:
Length = 1010
Locations:

———————–

2015-04-16 04:13:25,718 [LocalJobRunner Map Task Executor #0] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigRecordReader – Current split being processed hdfs://vm-5cc5-bad6:9000/user/demo/mongo/dbversity_col/part-m-00000:0+1010
2015-04-16 04:13:25,719 [LocalJobRunner Map Task Executor #0] INFO com.mongodb.hadoop.pig.MongoStorage – Store Location Config: Configuration: core-default.xml, core-site.xml, mapred-default.xml, mapred-site.xml, yarn-default.xml, yarn-site.xml, hdfs-default.xml, hdfs-site.xml, file:/tmp/hadoop-root/mapred/local/localRunner/root/job_local1045407920_0002/job_local1045407920_0002.xml For URI: mongodb://localhost:27017/dbversity_db.dbversity_col_backup
2015-04-16 04:13:25,915 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher – HadoopJobId: job_local1045407920_0002
2015-04-16 04:13:25,915 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher – Processing aliases bkp_data,dbversity_db
2015-04-16 04:13:25,915 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher – detailed locations: M: bkp_data[1,11],dbversity_db[2,15] C: R:
2015-04-16 04:13:25,915 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher – More information at: http://myhostname:50030/jobdetails.jsp?jobid=job_local1045407920_0002
2015-04-16 04:13:25,918 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher – 0% complete
2015-04-16 04:13:25,946 [LocalJobRunner Map Task Executor #0] INFO com.mongodb.hadoop.pig.MongoStorage – GOT A SCHEMA versity_id:bytearray,technology:bytearray,student:bytearray,gender:bytearray versity_id:bytearray,technology:bytearray,student:bytearray,gender:bytearray
2015-04-16 04:13:25,961 [LocalJobRunner Map Task Executor #0] INFO org.apache.pig.data.SchemaTupleBackend – Key [pig.schematuple] was not set… will not generate code.
2015-04-16 04:13:25,974 [LocalJobRunner Map Task Executor #0] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigMapOnly$Map – Aliases being processed per job phase (AliasName[line,offset]): M: bkp_data[1,11],dbversity_db[2,15] C: R:
2015-04-16 04:13:26,027 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.LocalJobRunner –
2015-04-16 04:13:26,030 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.Task – Task:attempt_local1045407920_0002_m_000000_0 is done. And is in the process of committing
2015-04-16 04:13:26,033 [LocalJobRunner Map Task Executor #0] INFO com.mongodb.hadoop.pig.MongoStorage – Store Location Config: Configuration: core-default.xml, core-site.xml, mapred-default.xml, mapred-site.xml, yarn-default.xml, yarn-site.xml, hdfs-default.xml, hdfs-site.xml, file:/tmp/hadoop-root/mapred/local/localRunner/root/job_local1045407920_0002/job_local1045407920_0002.xml For URI: mongodb://localhost:27017/dbversity_db.dbversity_col_backup
2015-04-16 04:13:26,035 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.LocalJobRunner –
2015-04-16 04:13:26,035 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.Task – Task attempt_local1045407920_0002_m_000000_0 is allowed to commit now
2015-04-16 04:13:26,036 [LocalJobRunner Map Task Executor #0] INFO com.mongodb.hadoop.pig.MongoStorage – Store Location Config: Configuration: core-default.xml, core-site.xml, mapred-default.xml, mapred-site.xml, yarn-default.xml, yarn-site.xml, hdfs-default.xml, hdfs-site.xml, file:/tmp/hadoop-root/mapred/local/localRunner/root/job_local1045407920_0002/job_local1045407920_0002.xml For URI: mongodb://localhost:27017/dbversity_db.dbversity_col_backup
2015-04-16 04:13:26,037 [LocalJobRunner Map Task Executor #0] INFO com.mongodb.hadoop.output.MongoOutputCommitter – Committing task.
2015-04-16 04:13:26,038 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.LocalJobRunner – map
2015-04-16 04:13:26,038 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.Task – Task ‘attempt_local1045407920_0002_m_000000_0’ done.
2015-04-16 04:13:26,038 [LocalJobRunner Map Task Executor #0] INFO org.apache.hadoop.mapred.LocalJobRunner – Finishing task: attempt_local1045407920_0002_m_000000_0
2015-04-16 04:13:26,038 [Thread-39] INFO org.apache.hadoop.mapred.LocalJobRunner – map task executor complete.
2015-04-16 04:13:26,039 [Thread-39] INFO com.mongodb.hadoop.pig.MongoStorage – Store Location Config: Configuration: core-default.xml, core-site.xml, mapred-default.xml, mapred-site.xml, yarn-default.xml, yarn-site.xml, hdfs-default.xml, hdfs-site.xml, file:/tmp/hadoop-root/mapred/local/localRunner/root/job_local1045407920_0002/job_local1045407920_0002.xml For URI: mongodb://localhost:27017/dbversity_db.dbversity_col_backup
2015-04-16 04:13:30,929 [main] WARN org.apache.pig.tools.pigstats.PigStatsUtil – Failed to get RunningJob for job job_local1045407920_0002
2015-04-16 04:13:30,933 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher – 100% complete
2015-04-16 04:13:30,933 [main] INFO org.apache.pig.tools.pigstats.SimplePigStats – Script Statistics:

HadoopVersion PigVersion UserId StartedAt FinishedAt Features
2.3.0-cdh5.0.0 0.12.0-cdh5.0.0 root 2015-04-16 04:13:21 2015-04-16 04:13:30 UNKNOWN

Success!

Job Stats (time in seconds):
JobId Maps Reduces MaxMapTime MinMapTIme AvgMapTime MedianMapTime MaxReduceTime MinReduceTime AvgReduceTime MedianReducetime Alias Feature Outputs
job_local1045407920_0002 1 0 0 0 0 0 n/a n/a n/a n/a bkp_data,dbversity_db MAP_ONLY mongodb://localhost:27017/dbversity_db.dbversity_col_backup,

Input(s):
Successfully read 0 records from: “/user/demo/mongo/dbversity_col/part-m-00000”

Output(s):
Successfully stored 0 records in: “mongodb://localhost:27017/dbversity_db.dbversity_col_backup”

Counters:
Total records written : 0
Total bytes written : 0
Spillable Memory Manager spill count : 0
Total bags proactively spilled: 0
Total records proactively spilled: 0

Job DAG:
job_local1045407920_0002
2015-04-16 04:13:30,934 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher – Success!
grunt>

At MongoDB

> db.dbversity_col_backup.find()
{ “_id” : ObjectId(“552f6f26e4b03c263f07d746”), “versity_id” : “154”, “technology” : “MongoDB”, “student” : “Srinivas”, “gender” : “M” }
{ “_id” : ObjectId(“552f6f26e4b03c263f07d747”), “versity_id” : “155”, “technology” : “CouchBase”, “student” : “John”, “gender” : “M” }
{ “_id” : ObjectId(“552f6f26e4b03c263f07d748”), “versity_id” : “156”, “technology” : “MemSQL”, “student” : “Pinky”, “gender” : “F” }
{ “_id” : ObjectId(“552f6f26e4b03c263f07d749”), “versity_id” : “157”, “technology” : “Redis”, “student” : “Steves”, “gender” : “M” }
{ “_id” : ObjectId(“552f6f26e4b03c263f07d74a”), “versity_id” : “158”, “technology” : “MySQl”, “student” : “Sweety”, “gender” : “F” }
{ “_id” : ObjectId(“552f6f26e4b03c263f07d74b”), “versity_id” : “159”, “technology” : “Oracle”, “student” : “Williams”, “gender” : “M” }
{ “_id” : ObjectId(“552f6f26e4b03c263f07d74c”), “versity_id” : “160”, “technology” : “Elastic”, “student” : “Lucy”, “gender” : “F” }
{ “_id” : ObjectId(“552f6f26e4b03c263f07d74d”), “versity_id” : “161”, “technology” : “RDBMS”, “student” : “Michelle”, “gender” : “F” }
{ “_id” : ObjectId(“552f6f26e4b03c263f07d74e”), “versity_id” : “162”, “technology” : “NoSQL”, “student” : “Don”, “gender” : “M” }
{ “_id” : ObjectId(“552f6f26e4b03c263f07d74f”), “versity_id” : “163”, “technology” : “AuroraDB”, “student” : “Rosy”, “gender” : “F” }
>
>

  • Ask Question