spark初始化有问题

val sparkConf = new SparkConf().
setIfMissing(“spark.master”, “local[*]”).
setIfMissing(“spark.app.name”, getClass.getName).
setIfMissing(“spark.sql.extensions”, “org.apache.spark.sql.TiExtensions”).
setIfMissing(“spark.tispark.pd.addresses”, “192.168.98.63:32107”).
setIfMissing(“spark.tispark.tidb.addr”, “192.168.98.63”).
setIfMissing(“spark.tispark.tidb.port”, “30044”)
//set(“spark.jars.packages”, “mysql:mysql-connector-java:5.1.39”)

val spark = SparkSession.builder.config(sparkConf).getOrCreate()

val student = spark.sql("select * from student ")
//import spark.implicits._
student.toDF()
student.write.
   format("tidb").
   option("tidb.user", "root").
   option("tidb.password", "").
   option("database", "chenxinhui").
   option("table", "student3").
   mode("append").
   save()

报错
Exception in thread “main” java.lang.NullPointerException: Failed to init client for PD cluster.
at com.pingcap.com.google.common.base.Preconditions.checkNotNull(Preconditions.java:906)
at com.pingcap.tikv.PDClient.initCluster(PDClient.java:500)
at com.pingcap.tikv.PDClient.(PDClient.java:100)
at com.pingcap.tikv.PDClient.createRaw(PDClient.java:110)
at com.pingcap.tikv.TiSession.getPDClient(TiSession.java:128)
at com.pingcap.tikv.TiSession.getTimestamp(TiSession.java:112)
at com.pingcap.tikv.TiSession.createSnapshot(TiSession.java:116)
at com.pingcap.tispark.statistics.StatisticsManager$.initialize(StatisticsManager.scala:274)
at com.pingcap.tispark.statistics.StatisticsManager$.initStatisticsManager(StatisticsManager.scala:266)
at org.apache.spark.sql.TiContext.(TiContext.scala:63)
at org.apache.spark.sql.TiExtensions.getOrCreateTiContext(TiExtensions.scala:47)
at org.apache.spark.sql.TiExtensions.$anonfun$apply$1(TiExtensions.scala:34)
at org.apache.spark.sql.extensions.TiParser.tiContext$lzycompute(TiParser.scala:45)
at org.apache.spark.sql.extensions.TiParser.tiContext(TiParser.scala:45)
at org.apache.spark.sql.extensions.TiParser.org$apache$spark$sql$extensions$TiParser$$needQualify(TiParser.scala:126)
at org.apache.spark.sql.extensions.TiParser$$anonfun$1.applyOrElse(TiParser.scala:58)
at org.apache.spark.sql.extensions.TiParser$$anonfun$1.applyOrElse(TiParser.scala:57)
at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDown$1(TreeNode.scala:317)
at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:73)
at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:317)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDown(LogicalPlan.scala:29)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDown(AnalysisHelper.scala:171)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDown$(AnalysisHelper.scala:169)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDown(LogicalPlan.scala:29)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDown(LogicalPlan.scala:29)
at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDown$3(TreeNode.scala:322)
at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$mapChildren$1(TreeNode.scala:407)
at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:243)
at org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:405)
at org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:358)
at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:322)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDown(LogicalPlan.scala:29)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDown(AnalysisHelper.scala:171)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDown$(AnalysisHelper.scala:169)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDown(LogicalPlan.scala:29)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDown(LogicalPlan.scala:29)
at org.apache.spark.sql.catalyst.trees.TreeNode.transform(TreeNode.scala:306)
at org.apache.spark.sql.extensions.TiParser.parsePlan(TiParser.scala:99)
at org.apache.spark.sql.SparkSession.$anonfun$sql$2(SparkSession.scala:613)
at org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:111)
at org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:613)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:772)
at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:610)
at com.supcon.test.TisparkTest$.main(TisparkTest.scala:21)
at com.supcon.test.TisparkTest.main(TisparkTest.scala)

1 个赞

什么问题啊

帮忙确认一下 :

  1. PD 状态是否正常,看一下 pd log ;
  2. Tispark 查询的 SQL 是只有 select. * from student 不能查 ?还是都不能?select * from student 是多大数据量 ?Tispark 的负载怎么样 ?
1 个赞

我在idea上做测试,数据就几条,

加上setIfMissing(“spark.sql.extensions”, “org.apache.spark.sql.TiExtensions”).就报Failed to init client for PD cluster
住宿掉setIfMissing(“spark.sql.extensions”, “org.apache.spark.sql.TiExtensions”)就报表不存在

1 个赞

这个 pd 可以连上吗?请确认一下

怎么确认

我在windows上用的ktconnext,这个应该没有错

1 netcat 看下端口是否可以连上
2 pd 日志中找下有没有连接上的信息

大哥具体一点

请问这两个是同一个问题吗?

1 个赞

对,差不多

  1. 通过 telnet 看一下 netcat 验证端口状态;
  2. 在 PD 日志找一下对应的 IP 连接信息状态,也可以把操作时间的 PD 日志发一下。
  1. 通过 telnet 看一下 netcat 验证端口状态;telnet 192.168.98.63 2379 没有通

是不是我在windows上,ktconnect不成功访问不了k8s

跳板机登陆上去查询一下,需要依赖 Pd log 看一下问题。

1 个赞

k8s不是我部署的,找半天不知道日志在哪

1 个赞

已经查看日志没有找到error信息

pdlog.txt (106.7 KB)

这个对应的是报错时间么 ?