tabs - separate Error: load csv to hive -


i cherry!

 add jar /home/hadoop/csv-serde.jar; create database if not exists test; use test; create table if not exists test.neighbors(     objid bigint ,     neighborobjid bigint ,     distance float ,     type smallint ,     neighbortype smallint ,     mode tinyint ,     neighbormode tinyint) row format serde  'com.bizo.hive.serde.csv.csvserde' serdeproperties (    "separatorchar" = "\t")stored textfile; 

i cerate table 7 attributes. load hdfs.

hadoop fs -put /home/hadoop/hive/csv/neighbors.csv /usr/hadoop/src/test/org/apache/hadoop/hdfs 

i load neighbors.

 use test;     add jar /home/hadoop/csv-serde.jar;     load data  inpath '/usr/hadoop/src/test/org/apache/hadoop/hdfs/neighbors.csv' overwrite table neighbors; 

there should 7 arrributes.

select * neighbors; 1237645876861272110  1237663542075064469  0.11240466522238592           6      3      2    2    null    null    null    null    null    null 1237645876861272110  1237663542075064470  0.21434291786709292           6      3      2    2    null    null    null    null    null    null 1237645876861272110  1237663542075064471  0.23417060042663973           6      6      2    2    null    null    null    null    null    null 1237645876861272110  1237666299449507934  1.2029018814611406e-3         6      6      2    2    null    null    null    null    null    null 1237645876861272110  1237666299449507935  0.11229523899396199           6      3      2    2    null    null    null    null    null    null 1237645876861272110  1237666299449507936  0.21438380604022358           6      3      2    2    null    null    null    null    null    null 1237645876861272110  1237666338097660047  1.7811731642581635e-3         6      6      2    2    null    null    null    null    null    null 1237645876861272110  1237666338097660048  0.11403807309757029           6      3      2    2    null    null    null    null    null    null select objid neighbors; 1237645876861272110  1237663542075064468  1.0369065814116625e-3         6      6      2    2 1237645876861272110  1237663542075064469  0.11240466522238592           6      3      2    2 1237645876861272110  1237663542075064470  0.21434291786709292           6      3      2    2 1237645876861272110  1237663542075064471  0.23417060042663973           6      6      2    2 1237645876861272110  1237666299449507934  1.2029018814611406e-3         6      6      2    2 1237645876861272110  1237666299449507935  0.11229523899396199           6      3      2    2 1237645876861272110  1237666299449507936  0.21438380604022358           6      3      2    2 1237645876861272110  1237666338097660047  1.7811731642581635e-3         6      6      2    2 1237645876861272110  1237666338097660048  0.11403807309757029           6      3      2    2 

there 1 attribute values , others null.
here neighbors.csv.neighbors.csv csv file big(100gb) , 7 attributes in 1 column. take 92kb example.

1237645876861272071 1237645876861272078 0.32965707824169882 3 3 2 2 1237645876861272071 1237649806768144454 0.26459062942243383 3 3 2 2 1237645876861272071 1237649806768144458 0.39759983562125262 3 6 2 2 1237645876861272071 1237649806768144467 0.42354716397014541 3 6 2 2 1237645876861272071 1237649806768144474 0.34070753757945649 3 6 2 2 1237645876861272071 1237649909843296374 0.18245847995970663 3 3 2 2 1237645876861272071 1237649909843296375 5.4353586098331253e-3 3 3 2 2 1237645876861272071 1237649909843296380 0.38719702159199682 3 3 2 2 1237645876861272071 1237649909843296384 0.33965518492584618 3 6 2 2 1237645876861272071 1237649909843296393 0.43922183220216948 3 6 2 2 1237645876861272071 1237649909843296395 0.49000926139721906 3 3 2 2 1237645876861272071 1237649909843296409 0.44651142644641773 3 6 2 2 1237645876861272071 1237649909843296411 0.39216278873263422 3 3 2 2 1237645876861272071 1237649909843296417 0.31949073955943957 3 3 2 2 1237645876861272071 1237649909843296422 0.33711157914306916 3 3 2 2 1237645876861272071 1237649909843361812 0.26280159296107986 3 3 2 2 1237645876861272071 1237649909843361858 0.37196149776646037 3 3 2 2 1237645876861272071 1237649909843361862 0.49007799303090271 3 6 2 2 1237645876861272071 1237650009161334811 0.18281445894897566 3 6 2 2 1237645876861272071 1237650009161334813 7.8938692921539429e-3 3 3 2 2 1237645876861272071 1237650009161334885 0.43000492776202714 3 6 2 2 1237645876861272071 1237653011352453152 0.18222425001383408 3 3 2 2 1237645876861272071 1237653011352453154 0.28619458530146008 3 6 2 2 1237645876861272071 1237653011352453159 3.3057687354212687e-3 3 3 2 2

thanks lot!

i got solution using modifying file before loading hdfs:

original input:

1237645876861272071  1237645876861272078  0.32965707824169882           3      3      2    2 1237645876861272071  1237649806768144454  0.26459062942243383           3      3      2    2 1237645876861272071  1237649806768144458  0.39759983562125262           3      6      2    2 1237645876861272071  1237649806768144467  0.42354716397014541           3      6      2    2 1237645876861272071  1237649806768144474  0.34070753757945649           3      6      2    2 

applied sed command replace white-space ","

sed -e 's/\s\+/,/g' neighbors.csv > neighbors.csv sed -e 's/,$//' neighbors.csv > neighbors.csv 

the command replace white-space comma

modified input:

1237645876861272071,1237645876861272078,0.32965707824169882,3,3,2,2 1237645876861272071,1237649806768144454,0.26459062942243383,3,3,2,2 1237645876861272071,1237649806768144458,0.39759983562125262,3,6,2,2 1237645876861272071,1237649806768144467,0.42354716397014541,3,6,2,2 

loaded file hdfs:

hdfs dfs -put neighbors.csv /user/root/test/hive/ 

create table statement:

create external table if not exists test_neighbors(     objid string,     neighborobjid string ,     distance string,     type smallint ,     neighbortype smallint ,     mode tinyint ,     neighbormode tinyint)      row format delimited fields terminated ','     stored textfile     location '/user/root/test/hive/'; 

Comments

Popular posts from this blog

Magento/PHP - Get phones on all members in a customer group -

php - Bypass Geo Redirect for specific directories -

php - .htaccess mod_rewrite for dynamic url which has domain names -