Spark SQL: ORDER BY count DESC fails? -
there table 2 columns books
, readers
of these books, books
, readers
book , reader ids, respectively. when trying order readers number of books read, abstractsparksqlparser
exception:
import org.apache.spark.sparkconf import org.apache.spark.sparkcontext import org.apache.spark.sparkcontext._ import org.apache.log4j.logger import org.apache.log4j.level import org.apache.spark.sql.functions._ object small { case class book(book: int, reader: int) val recs = array( book(book = 1, reader = 30), book(book = 2, reader = 10), book(book = 3, reader = 20), book(book = 1, reader = 20), book(book = 1, reader = 10), book(book = 1, reader = 40), book(book = 2, reader = 40), book(book = 2, reader = 30)) def main(args: array[string]) { logger.getlogger("org.apache.spark").setlevel(level.warn) logger.getlogger("org.eclipse.jetty.server").setlevel(level.off) // set environment val conf = new sparkconf() .setmaster("local[5]") .setappname("small") .set("spark.executor.memory", "2g") val sc = new sparkcontext(conf) val sqlcontext = new org.apache.spark.sql.sqlcontext(sc) import sqlcontext.implicits._ val df = sc.parallelize(recs).todf() val readergroups = df.groupby("reader").count() readergroups.show() readergroups.registertemptable("readergroups") readergroups.printschema() // "select reader, count readergroups order count desc" val readergroupssorted = sqlcontext.sql("select * readergroups order count desc") readergroupssorted.show() println("group cnt: "+readergroupssorted.count())
and output, 'groupby` works right:
reader count 40 2 10 2 20 2 30 2
resulting schema:
root |-- reader: integer (nullable = false) |-- count: long (nullable = false)
yet select * readergroups order count desc
fails exception (see below). in fact other select
rtequests fail well, except for select * readergroups
, select reader readergroups
- these work. why that?
how make order count desc
work?
exception in thread "main" java.lang.runtimeexception: [1.43] failure: ``('' expected `desc' found select * readergroups order count desc ^ @ scala.sys.package$.error(package.scala:27) @ org.apache.spark.sql.catalyst.abstractsparksqlparser.apply(abstractsparksqlparser.scala:40) @ org.apache.spark.sql.sqlcontext$$anonfun$2.apply(sqlcontext.scala:134) @ org.apache.spark.sql.sqlcontext$$anonfun$2.apply(sqlcontext.scala:134) @ org.apache.spark.sql.sparksqlparser$$anonfun$org$apache$spark$sql$sparksqlparser$$others$1.apply(sparksqlparser.scala:96) @ org.apache.spark.sql.sparksqlparser$$anonfun$org$apache$spark$sql$sparksqlparser$$others$1.apply(sparksqlparser.scala:95) @ scala.util.parsing.combinator.parsers$success.map(parsers.scala:136) @ scala.util.parsing.combinator.parsers$success.map(parsers.scala:135) @ scala.util.parsing.combinator.parsers$parser$$anonfun$map$1.apply(parsers.scala:242) @ scala.util.parsing.combinator.parsers$parser$$anonfun$map$1.apply(parsers.scala:242) @ scala.util.parsing.combinator.parsers$$anon$3.apply(parsers.scala:222) @ scala.util.parsing.combinator.parsers$parser$$anonfun$append$1$$anonfun$apply$2.apply(parsers.scala:254) @ scala.util.parsing.combinator.parsers$parser$$anonfun$append$1$$anonfun$apply$2.apply(parsers.scala:254) @ scala.util.parsing.combinator.parsers$failure.append(parsers.scala:202) @ scala.util.parsing.combinator.parsers$parser$$anonfun$append$1.apply(parsers.scala:254) @ scala.util.parsing.combinator.parsers$parser$$anonfun$append$1.apply(parsers.scala:254) @ scala.util.parsing.combinator.parsers$$anon$3.apply(parsers.scala:222) @ scala.util.parsing.combinator.parsers$$anon$2$$anonfun$apply$14.apply(parsers.scala:891) @ scala.util.parsing.combinator.parsers$$anon$2$$anonfun$apply$14.apply(parsers.scala:891) @ scala.util.dynamicvariable.withvalue(dynamicvariable.scala:57) @ scala.util.parsing.combinator.parsers$$anon$2.apply(parsers.scala:890) @ scala.util.parsing.combinator.packratparsers$$anon$1.apply(packratparsers.scala:110) @ org.apache.spark.sql.catalyst.abstractsparksqlparser.apply(abstractsparksqlparser.scala:38) @ org.apache.spark.sql.sqlcontext$$anonfun$parsesql$1.apply(sqlcontext.scala:138) @ org.apache.spark.sql.sqlcontext$$anonfun$parsesql$1.apply(sqlcontext.scala:138) @ scala.option.getorelse(option.scala:120) @ org.apache.spark.sql.sqlcontext.parsesql(sqlcontext.scala:138) @ org.apache.spark.sql.sqlcontext.sql(sqlcontext.scala:933) @ small$.main(small.scala:60) @ small.main(small.scala) @ sun.reflect.nativemethodaccessorimpl.invoke0(native method) @ sun.reflect.nativemethodaccessorimpl.invoke(nativemethodaccessorimpl.java:57) @ sun.reflect.delegatingmethodaccessorimpl.invoke(delegatingmethodaccessorimpl.java:43) @ java.lang.reflect.method.invoke(method.java:606) @ com.intellij.rt.execution.application.appmain.main(appmain.java:140)
the problem name of colum count. count reserved word in spark, cant use name query, or sort field.
you can try backticks:
select * readergroups order `count` desc
the other option rename column count different numreaders or whatever...
Comments
Post a Comment