欢迎来到尧图网

客户服务 关于我们

您的位置:首页 > 科技 > 名人名企 > seatunnel配置mysql2hive

seatunnel配置mysql2hive

2025/5/22 13:50:04 来源:https://blog.csdn.net/m0_53842576/article/details/146573307  浏览:    关键词:seatunnel配置mysql2hive

SeaTunnel安装教程

# ====执行流程
# 下载,解压
# https://mirrors.aliyun.com/apache/seatunnel/2.3.8/?spm=a2c6h.25603864.0.0.2e2d3f665eBj1E
# https://blog.csdn.net/taogumo/article/details/143608532
tar -zxvf apache-seatunnel-2.3.8-bin.tar.gz -C /opt/module/ 
# 改名
mv apache-seatunnel-2.3.8 seatunnel
# 导入连接器 /seatunnel/connectors/
# 链接: https://pan.baidu.com/s/1Q4lTMtiBWlP5-3epmCC6jw?pwd=ejkx 提取码: ejkx 
mysql hive hdoop
# 测试,可以正常执行,说明安装成功
cd /opt/module/seatunnel/ 
./bin/seatunnel.sh 
--config ./config/v2.batch.config.template 
-m local

模拟数据到hive-fake2hive

编辑测试脚本fake2hive.config ,source为模拟数据,sink配置hive

env {parallelism = 1job.mode = "BATCH"job.name = "HiveSinkExample"
}
source {FakeSource {  # 示例数据源schema = {fields {id = intname = stringscore = double}}rows = [{ kind = INSERT, fields = [1, "Alice", 90.5] },{ kind = INSERT, fields = [2, "Bob", 85.0] },{ kind = INSERT, fields = [3, "Charlie", 92.0] }]}
}
sink {Hive {table_name = "default.test_hive_sink"metastore_uri = "thrift://hadoop1:9083"hdfs_site_path = "/opt/module/hadoop/etc/hadoop/hdfs-site.xml"hive_site_path = "/opt/module/hive/conf/hive-site.xml"save_mode = "append"file_format = "text"                  # 必须与Hive表存储格式一致}
}

配置hive连接,并启动同步脚本

# 上传对应连接器
connector-hive-2.3.8.jar
connector-file-hadoop-2.3.8.jar
# 将hive和hadoop的相关依赖包复制到seatunnel的lib下(本地集群hive为3.1.3版本,hadoop为3.3.4,spark为3.3.1)
cp /opt/module/hive/lib/hive-metastore-3.1.3.jar /opt/module/seatunnel/lib/
cp /opt/module/hive/lib/hive-exec-3.1.3.jar /opt/module/seatunnel/lib/
cp /opt/module/hive/lib/libfb303-0.9.3.jar /opt/module/seatunnel/lib/
cp $HADOOP_HOME/share/hadoop/common/*.jar /opt/module/seatunnel/lib/
cp $HADOOP_HOME/share/hadoop/hdfs/*.jar /opt/module/seatunnel/lib/
# 先启动metastore服务,前后台启动命令
hive --service metastore
nohup hive --service metastore > metastore.log 2>&1 &
# 在hive cli中执行建表语句,创建测试表,配置中设置了自动建表但没生效
CREATE TABLE IF NOT EXISTS default.test_hive_sink (id INT,name STRING,score DOUBLE
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ','  
STORED AS TEXTFILE;  
# 执行数据同步命令
cd /opt/module/seatunnel/ 
./bin/seatunnel.sh 
--config ./config/fake2hive.config 
-m local #如果去掉,需要单独配置spark或flink分布式引擎
# 验证数据
hive --database default -e "SELECT * FROM test_hive_sink;"

mysql2console

创建表、导入数据,dbeaver可以直接从数据库1导入数据库2。也可以不用创建表,直接将表及数据从数据库1导入数据库2.

创建配置文件,主要是source的设置

# Defining the runtime environment
env {parallelism = 4job.mode = "BATCH"job.name = "MysqlExample"
}
source{Jdbc {url = "jdbc:mysql://hadoop1:3306/finance?serverTimezone=GMT%2b8&useUnicode=true&characterEncoding=UTF-8&rewriteBatchedStatements=true"driver = "com.mysql.cj.jdbc.Driver"connection_check_timeout_sec = 100user = "root"password = "xx"query = "select * from index_def limit 16"}
}
sink {Console {}
}

执行

# 导入mysql引擎到seatunnel的plugin文件下
# /opt/module/seatunnel/plugins
mysql-connector-j-8.0.31.jar
# 启动,配置的source的前面要用Jdbc,MYSQL报错
cd /opt/module/seatunnel/ 
./bin/seatunnel.sh 
--config ./config/mysql2console.config
-m local

mysql2hive

在hive中创建要同步的表

先创建数据库,CREATE DATABASE IF NOT EXISTS finance;

编辑配置脚本mysql2hive

env {parallelism = 1job.mode = "BATCH"job.name = "HiveSinkExample"
}
source{Jdbc {url = "jdbc:mysql://hadoop1:3306/finance?serverTimezone=GMT%2b8&useUnicode=true&characterEncoding=UTF-8&rewriteBatchedStatements=true"driver = "com.mysql.cj.jdbc.Driver"connection_check_timeout_sec = 100user = "root"password = "xx"query = "select * from index_def"}
}
sink {Hive {table_name = "finace.index_def"metastore_uri = "thrift://hadoop1:9083"hdfs_site_path = "/opt/module/hadoop/etc/hadoop/hdfs-site.xml"hive_site_path = "/opt/module/hive/conf/hive-site.xml"save_mode = "append"file_format = "text"                  # 必须与Hive表存储格式一致}
}

启动

cd /opt/module/seatunnel/ 
./bin/seatunnel.sh 
--config ./config/mysql2hive.config
-m local

同步多张表

env {parallelism = 1job.mode = "BATCH"job.name = "HiveSinkExample"
}
source{Jdbc {name = "source1"url = "jdbc:mysql://hadoop1:3306/finance?serverTimezone=GMT%2b8&useUnicode=true&characterEncoding=UTF-8&rewriteBatchedStatements=true"driver = "com.mysql.cj.jdbc.Driver"connection_check_timeout_sec = 100user = "root"password = "xx"query = "select * from index_def1"result_table_name = "index_def1_result"}Jdbc {name = "source2"url = "jdbc:mysql://hadoop1:3306/finance?serverTimezone=GMT%2b8&useUnicode=true&characterEncoding=UTF-8&rewriteBatchedStatements=true"driver = "com.mysql.cj.jdbc.Driver"connection_check_timeout_sec = 100user = "root"password = "xx"query = "select * from index_def2"result_table_name = "index_def2_result"}    
}
sink {Hive {name = "sink1"table_name = "finace.index_def1"metastore_uri = "thrift://hadoop1:9083"hdfs_site_path = "/opt/module/hadoop/etc/hadoop/hdfs-site.xml"hive_site_path = "/opt/module/hive/conf/hive-site.xml"save_mode = "append"file_format = "text"                 source_table_name = "index_def1_result" }Hive {name = "sink2"table_name = "finace.index_def2"metastore_uri = "thrift://hadoop1:9083"hdfs_site_path = "/opt/module/hadoop/etc/hadoop/hdfs-site.xml"hive_site_path = "/opt/module/hive/conf/hive-site.xml"save_mode = "append"file_format = "text"        source_table_name = "index_def2_result" 
}
}

启动

cd /opt/module/seatunnel/ 
./bin/seatunnel.sh 
--config ./config/n2hive.config
-m local

版权声明:

本网仅为发布的内容提供存储空间,不对发表、转载的内容提供任何形式的保证。凡本网注明“来源:XXX网络”的作品,均转载自其它媒体,著作权归作者所有,商业转载请联系作者获得授权,非商业转载请注明出处。

我们尊重并感谢每一位作者,均已注明文章来源和作者。如因作品内容、版权或其它问题,请及时与我们联系,联系邮箱:809451989@qq.com,投稿邮箱:809451989@qq.com

热搜词