tabs - separate Error: load csv to hive -
i cherry!
add jar /home/hadoop/csv-serde.jar; create database if not exists test; use test; create table if not exists test.neighbors( objid bigint , neighborobjid bigint , distance float , type smallint , neighbortype smallint , mode tinyint , neighbormode tinyint) row format serde 'com.bizo.hive.serde.csv.csvserde' serdeproperties ( "separatorchar" = "\t")stored textfile;
i cerate table 7 attributes. load hdfs.
hadoop fs -put /home/hadoop/hive/csv/neighbors.csv /usr/hadoop/src/test/org/apache/hadoop/hdfs
i load neighbors.
use test; add jar /home/hadoop/csv-serde.jar; load data inpath '/usr/hadoop/src/test/org/apache/hadoop/hdfs/neighbors.csv' overwrite table neighbors;
there should 7 arrributes.
select * neighbors; 1237645876861272110 1237663542075064469 0.11240466522238592 6 3 2 2 null null null null null null 1237645876861272110 1237663542075064470 0.21434291786709292 6 3 2 2 null null null null null null 1237645876861272110 1237663542075064471 0.23417060042663973 6 6 2 2 null null null null null null 1237645876861272110 1237666299449507934 1.2029018814611406e-3 6 6 2 2 null null null null null null 1237645876861272110 1237666299449507935 0.11229523899396199 6 3 2 2 null null null null null null 1237645876861272110 1237666299449507936 0.21438380604022358 6 3 2 2 null null null null null null 1237645876861272110 1237666338097660047 1.7811731642581635e-3 6 6 2 2 null null null null null null 1237645876861272110 1237666338097660048 0.11403807309757029 6 3 2 2 null null null null null null select objid neighbors; 1237645876861272110 1237663542075064468 1.0369065814116625e-3 6 6 2 2 1237645876861272110 1237663542075064469 0.11240466522238592 6 3 2 2 1237645876861272110 1237663542075064470 0.21434291786709292 6 3 2 2 1237645876861272110 1237663542075064471 0.23417060042663973 6 6 2 2 1237645876861272110 1237666299449507934 1.2029018814611406e-3 6 6 2 2 1237645876861272110 1237666299449507935 0.11229523899396199 6 3 2 2 1237645876861272110 1237666299449507936 0.21438380604022358 6 3 2 2 1237645876861272110 1237666338097660047 1.7811731642581635e-3 6 6 2 2 1237645876861272110 1237666338097660048 0.11403807309757029 6 3 2 2
there 1 attribute values , others null.
here neighbors.csv.neighbors.csv csv file big(100gb) , 7 attributes in 1 column. take 92kb example.
1237645876861272071 1237645876861272078 0.32965707824169882 3 3 2 2 1237645876861272071 1237649806768144454 0.26459062942243383 3 3 2 2 1237645876861272071 1237649806768144458 0.39759983562125262 3 6 2 2 1237645876861272071 1237649806768144467 0.42354716397014541 3 6 2 2 1237645876861272071 1237649806768144474 0.34070753757945649 3 6 2 2 1237645876861272071 1237649909843296374 0.18245847995970663 3 3 2 2 1237645876861272071 1237649909843296375 5.4353586098331253e-3 3 3 2 2 1237645876861272071 1237649909843296380 0.38719702159199682 3 3 2 2 1237645876861272071 1237649909843296384 0.33965518492584618 3 6 2 2 1237645876861272071 1237649909843296393 0.43922183220216948 3 6 2 2 1237645876861272071 1237649909843296395 0.49000926139721906 3 3 2 2 1237645876861272071 1237649909843296409 0.44651142644641773 3 6 2 2 1237645876861272071 1237649909843296411 0.39216278873263422 3 3 2 2 1237645876861272071 1237649909843296417 0.31949073955943957 3 3 2 2 1237645876861272071 1237649909843296422 0.33711157914306916 3 3 2 2 1237645876861272071 1237649909843361812 0.26280159296107986 3 3 2 2 1237645876861272071 1237649909843361858 0.37196149776646037 3 3 2 2 1237645876861272071 1237649909843361862 0.49007799303090271 3 6 2 2 1237645876861272071 1237650009161334811 0.18281445894897566 3 6 2 2 1237645876861272071 1237650009161334813 7.8938692921539429e-3 3 3 2 2 1237645876861272071 1237650009161334885 0.43000492776202714 3 6 2 2 1237645876861272071 1237653011352453152 0.18222425001383408 3 3 2 2 1237645876861272071 1237653011352453154 0.28619458530146008 3 6 2 2 1237645876861272071 1237653011352453159 3.3057687354212687e-3 3 3 2 2
thanks lot!
i got solution using modifying file before loading hdfs:
original input:
1237645876861272071 1237645876861272078 0.32965707824169882 3 3 2 2 1237645876861272071 1237649806768144454 0.26459062942243383 3 3 2 2 1237645876861272071 1237649806768144458 0.39759983562125262 3 6 2 2 1237645876861272071 1237649806768144467 0.42354716397014541 3 6 2 2 1237645876861272071 1237649806768144474 0.34070753757945649 3 6 2 2
applied sed
command replace white-space ","
sed -e 's/\s\+/,/g' neighbors.csv > neighbors.csv sed -e 's/,$//' neighbors.csv > neighbors.csv
the command replace white-space comma
modified input:
1237645876861272071,1237645876861272078,0.32965707824169882,3,3,2,2 1237645876861272071,1237649806768144454,0.26459062942243383,3,3,2,2 1237645876861272071,1237649806768144458,0.39759983562125262,3,6,2,2 1237645876861272071,1237649806768144467,0.42354716397014541,3,6,2,2
loaded file hdfs:
hdfs dfs -put neighbors.csv /user/root/test/hive/
create table statement:
create external table if not exists test_neighbors( objid string, neighborobjid string , distance string, type smallint , neighbortype smallint , mode tinyint , neighbormode tinyint) row format delimited fields terminated ',' stored textfile location '/user/root/test/hive/';
Post a Comment