之前跑的数据是 10万,报的下面第一个错,不过数据跑出来了。这次跑的是 100万,两个错都有报,程序跑了1个小时直接退出了。
程序如下:
val sparkConf = new SparkConf()
.set("spark.app.name", "Tidb Test")
.setIfMissing("spark.tispark.write.allow_spark_sql", "true")
.setIfMissing("spark.master", "local[4]")
.setIfMissing("spark.sql.extensions", "org.apache.spark.sql.TiExtensions")
.setIfMissing("spark.tispark.tidb.addr", "172.16.1.130")
.setIfMissing("spark.tispark.tidb.password", "example")
.setIfMissing("spark.tispark.tidb.port", "3390")
.setIfMissing("spark.tispark.tidb.user", "root")
.setIfMissing("spark.tispark.write.without_lock_table", "true")
.setIfMissing("writeConcurrency", "24")
.setIfMissing("spark.tispark.pd.addresses", "http://172.16.1.131:2379,http://172.16.1.132:2379,http://172.16.1.134:2379")
SparkSession.builder.config(sparkConf).getOrCreate()
...
def writeTarget(sqlContext: SQLContext): Unit = {
val df = sqlContext.read
.format("tidb")
.option("database", "law")
.option("table", "cases")
.load()
df.limit(1000000)
.write
.format("tidb")
.option("database", "test")
.option("table", "target_cases")
.option("regionSplitNum", 20)
.option("writeConcurrency", 24)
.option("delay-clean-table-lock", 60000)
.mode("append")
.save()
}
前期报错:
UNKNOWN: rpc error: code = Unavailable desc = not leader
后期报错:
com.pingcap.tikv.exception.KeyException: tikv restart txn Txn(Mvcc(TxnLockNotFound { start_ts: 411772526951137286, commit_ts: 411773218940780545, key: [116, 128, 0, 0, 0, 0, 0, 0, 102, 95, 105, 128, 0, 0, 0, 0, 0, 0, 1, 1, 98, 48, 48, 48, 56, 56, 54, 56, 255, 54, 49, 54, 55, 0, 0, 0, 0, 251] }))