README.md

Blazer/数据传播者

Blazer意为传播者,主要实现不同数据库之间的数据同步,导出数据库表结构创建语句。

  1. 导出数据库表数据到txt/CSV文件(多线程)或者excel(.xlsx/.xls);

  2. 把txt文件或者excel中的数据导入到数据库表,支持基于javascript脚本对数据的处理;

  3. 翻译特定的数据库表结构ddl成其他数据库的语法,方便数据迁移;

  4. 数据库之间的数据同步;

  5. 目前支持数据有mysql/MariaDB,Greenplum,Oracle,DB2,PostgreSQL;

  6. 支持JDK支持平台,JDK1.8+

  7. 下一步计划1)HADOOP支持,2)XML数据文件支持,3)JSON数据文件支持


项目基于Java Spring Framework框架,简化了系统的配置,引入"管道"(PipeLine)的概念,一个管道包含多个任务,任务可以是数据同步、导入、导出或者表结构的导出。

在不同的XML中引入不同管道任务,实现不同的功能,其中pipeLine.xml描述管道的功能,需要根据需求进行定义,可以基于以下的命令内容copy出不同脚本功能,传入不同的"管道"实现不同的功能。


脚本

WINDOWS PipeLineRunner.bat

@echo off
CALL setenv.bat

SET JAVA_MARK=PipeLineRunner
SET JAVA_OPTS=" -Xms128m "
SET JAVA_OPTS=%JAVA_OPTS% -Xmx1024m"
rem SET JAVA_OPTS="%JAVA_OPTS% -Dfile.encoding=UTF-8"
SET JAVA_OPTS="%JAVA_OPTS% -Dfile.encoding=GBK"
SET JAVA_OPTS="%JAVA_OPTS% -DjavaMark=%JAVA_MARK%"

SET JAVA_CONF=./etc
SET JAVA_LIBPATH=./lib
SET JAVA_CLASSPATH=./classes;./bin;%JAVA_CONF%
SET JAVA_MAINCLASS=com.blazer.pipeline.PipeLineRunner
SET JAVA_EXEC=%JAVA_HOME%/bin/java

rem mk logs dir
if NOT EXIST "./logs" MKDIR "logs"
rem init TEMP_CLASSPATH
SET TEMP_CLASSPATH=
rem new setclasspath.bat
echo SET TEMP_CLASSPATH=%%TEMP_CLASSPATH%%;%%1> setclasspath.bat

FOR  %%i IN (%JAVA_LIBPATH%/*.jar) DO (
CALL setclasspath.bat %JAVA_LIBPATH%/%%i
)

SET JAVA_CLASSPATH=%JAVA_CLASSPATH%;%TEMP_CLASSPATH%
rem delete setclasspath.bat
DEL setclasspath.bat
echo %JAVA_CLASSPATH%

rem Display our environment
echo ===============================================================================  
echo Bootstrap Environment 
echo.  
echo JAVA_CLASSPATH =  %JAVA_CLASSPATH%
echo JAVA_CONF      =  %JAVA_CONF%  
echo JAVA_OPTS      =  %JAVA_OPTS%  
echo JAVA_HOME      =  %JAVA_HOME%  
echo JAVA           =  %JAVA_EXEC%  
echo.  
%JAVA_EXEC% -version
echo.  
echo ===============================================================================  
echo.  
  
%JAVA_EXEC% %JAVA_OPTS%  -classpath %JAVA_CLASSPATH% %JAVA_MAINCLASS% --config pipeLine.xml

echo run finished
PAUSE

LINUX UNIX PipeLineRunner.sh

#!/bin/bash
sh setenv.sh

JAVA_MARK=PipeLineRunner
JAVA_OPTS=" -Xms128m "
JAVA_OPTS="${JAVA_OPTS} -Xmx1024m"
JAVA_OPTS="${JAVA_OPTS} -Dfile.encoding=UTF-8"
JAVA_OPTS="${JAVA_OPTS} -DjavaMark=${JAVA_MARK}"

JAVA_CONF=./etc
JAVA_LIBPATH=./lib
JAVA_CLASSPATH=./classes:./bin
JAVA_MAINCLASS=com.blazer.pipeline.PipeLineRunner
JAVA_EXEC=${JAVA_HOME}/bin/java

export JAVA_CLASSPATH
export JAVA_LIBPATH

for LL in `ls $JAVA_LIBPATH/*.jar`
        do
                JAVA_CLASSPATH=$JAVA_CLASSPATH:$LL
               
done

export JAVA_CLASSPATH
# Display our environment
echo "========================================================================="
echo "  Bootstrap Environment"
echo ""
echo JAVA_CLASSPATH :  ${JAVA_CLASSPATH}
echo JAVA_CONF      :  ${JAVA_CONF}
echo JAVA_OPTS      :  ${JAVA_OPTS}
echo JAVA_HOME      :  ${JAVA_HOME}  
echo JAVA           :  ${JAVA_EXEC} 
${JAVA_EXEC} -version
echo ""
echo "========================================================================="
echo ""

${JAVA_EXEC} ${JAVA_OPTS} -classpath ${JAVA_CLASSPATH} ${JAVA_MAINCLASS} --config pipeLine.xml

echo run finished

"管道"(PipeLine)

1、实现Oracle到greenplum数据同步pipeLine
<?xml version="1.0" encoding="UTF-8"?>
<beans 	xmlns="http://www.springframework.org/schema/beans"
		xmlns:context="http://www.springframework.org/schema/context"
		xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
		xmlns:util="http://www.springframework.org/schema/util"
		xsi:schemaLocation="
	        http://www.springframework.org/schema/beans     
	        http://www.springframework.org/schema/beans/spring-beans.xsd
	        http://www.springframework.org/schema/context 
	        http://www.springframework.org/schema/context/spring-context.xsd
	        http://www.springframework.org/schema/util     
	        http://www.springframework.org/schema/util/spring-util.xsd">
	<!-- Application properties configs  应用程序属性配置文件-->
	<bean id="propertySourcesPlaceholderConfigurer" class="org.springframework.context.support.PropertySourcesPlaceholderConfigurer">
	  <property name="locations"><list>
	   	 <value>/pipeline/config/applicationConfig.properties</value>
	  </list></property>
	  <property name="ignoreUnresolvablePlaceholders" value="true"/>
	</bean>
	
	<!-- enable component scanning (beware that this does not enable mapper scanning!) -->    
    <context:component-scan base-package="com.blazer" />
 	
 	<!-- Datastore configuration /数据源配置 -->
 	<import resource="database.xml"/>

 	<bean id="transData" class="com.blazer.trans.TransData">
 		<!--源数据库-->
		<property name="sourceDataSource" ref="datasource_oracle"/>
		<property name="fromUrl" value="${datasource_oracle.url}"/>
 		<property name="fromUser" value="${datasource_oracle.username}"/> 
 		<!--目标数据库-->
 		<property name="targetDataSource" ref="datasource_greenplum"/>
 		<property name="toUrl" value="${datasource_greenplum.url}"/>
 		<property name="toUser" value="${datasource_greenplum.username}"/> 
		<!-- FULL      在插入前进行删除,默认    -->
		<!-- INCREMENT 先清除条件相关数据,然后按照条件进行增量插入 -->
 		<property name="transType" value="FULL"/> 
 		<property name="tablesList" > 
 			<util:list  list-class="java.util.ArrayList">
			 	<bean class="com.db.TableDescribe">
					<!--源表名-->
 					<property name="tableName" value="STUDENT"/> 
					<!--目标表名-->
 					<property name="targetTableName" value="STUDENT"/> 
					<!--筛选条件-->
					<property name="whereSqlString" value="CLASSES='2020'"/>
 				</bean>
 				<bean class="com.db.TableDescribe">
 					<property name="tableName" value="CALENDAR"/> 
 					<property name="targetTableName" value="M_CALENDAR"/> 
 				</bean>
 			</util:list>
 		</property>
 	</bean>
 	
 	<!-- 配置执行的任务列表  -->
 	<util:list id="pipeLineTask" list-class="java.util.ArrayList">
 		<ref bean="transData"/>
 	</util:list>
</beans>
2、数据库数据导出到文件(csv,xlsx,xls)pipeLine
<?xml version="1.0" encoding="UTF-8"?>
<beans 	xmlns="http://www.springframework.org/schema/beans"
		xmlns:context="http://www.springframework.org/schema/context"
		xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
		xmlns:util="http://www.springframework.org/schema/util"
		xsi:schemaLocation="
	        http://www.springframework.org/schema/beans     
	        http://www.springframework.org/schema/beans/spring-beans.xsd
	        http://www.springframework.org/schema/context 
	        http://www.springframework.org/schema/context/spring-context.xsd
	        http://www.springframework.org/schema/util     
	        http://www.springframework.org/schema/util/spring-util.xsd">
 	<!-- Application properties configs  应用程序属性配置文件-->
	<bean id="propertySourcesPlaceholderConfigurer" class="org.springframework.context.support.PropertySourcesPlaceholderConfigurer">
	  <property name="locations"><list>
	   	 <value>/pipeline/config/applicationConfig.properties</value>
	  </list></property>
	  <property name="ignoreUnresolvablePlaceholders" value="true"/>
	</bean>
 	
 	<!-- Datastore configuration /数据源配置 -->
 	<import resource="database.xml"/>
 	
	<!--基本配置-->
 	<bean id="exportBasicConfigure" class="com.blazer.export.file.BasicConfigure">
 		<property name="sourceDataSource" ref="datasource_oracle"/>
		<property name="fromUrl" value="${datasource_oracle.url}"/>
 		<property name="fromUser" value="${datasource_oracle.username}"/> 
		<!--每次写条数-->
 		<property name="commitNumber" value="400"/>
		<!--导出线程-->
		<property name="threadSize" value="1"/>
		<!--导出文件格式 csv/xlsx/xls-->
		<property name="fileType" value="csv"/>
		<!--导出文件路径-->
 		<property name="exportFilePath" value="D:/dmp/"/>
		<!--导出文件后缀 csv(.txt,.csv)/xlsx(.xlsx)/xls(.xls) -->
 		<property name="fileNameSuffix" value=".txt"/>
		<!--导出文件字段分割-->
 		<property name="terminatedString" value="|+|"/>
 	</bean>	
 	<!--导出基本配置-->	
 	<bean id="transDataExport_STUD" class="com.blazer.export.file.TransDataExport">
 		<property name="sourceDataSource" ref="datasource_oracle"/>
		<property name="fromUrl" value="${datasource_oracle.url}"/>
 		<property name="fromUser" value="${datasource_oracle.username}"/> 
		<!--导出表名-->
		<property name="tableName" value="STUDENT"/>
		<!--导出文件名,其中{yyyyMMdd}为日期格式-->
		<property name="outFileName" value="STUDENT_{yyyyMMdd}_000_000"/>
		<!--每次写条数-->
 		<property name="commitNumber" value="400"/>
		<!--导出线程-->
		<property name="threadSize" value="1"/>
		<!--导出文件格式 csv/xlsx/xls-->
		<property name="fileType" value="csv"/>
		<!--导出文件路径-->
 		<property name="exportFilePath" value="D:/dmp/"/>
		<!--导出文件后缀 csv(.txt,.csv)/xlsx(.xlsx)/xls(.xls) -->
 		<property name="fileNameSuffix" value=".txt"/>
		<!--导出文件字段分割-->
 		<property name="terminatedString" value="|+|"/>
		<!--导出表数据过滤条件-->
		<!--
		<property name="whereSqlString" value="where length(body)&gt;10000 and length(body)&lt;50000" and FLAG='_EXPORT_ETL_DATE_'/>-->
		<property name="whereSqlString" value=""/>
		<!--导出字段,如果是*则全表导出-->
 		<property name="selectFieldsString" value="
			STUD_NAME           ,
			STUD_SEX         
		"
		/>
 	</bean>

 	<!-- 配置执行的任务列表  -->
 	<util:list id="pipeLineTask" list-class="java.util.ArrayList">
		<ref bean="transDataExport_STUD"/>
 	</util:list>
</beans>
3、数据文件导入到数据库 pipeLine
<?xml version="1.0" encoding="UTF-8"?>
<beans 	xmlns="http://www.springframework.org/schema/beans"
		xmlns:context="http://www.springframework.org/schema/context"
		xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
		xmlns:util="http://www.springframework.org/schema/util"
		xsi:schemaLocation="
	        http://www.springframework.org/schema/beans     
	        http://www.springframework.org/schema/beans/spring-beans.xsd
	        http://www.springframework.org/schema/context 
	        http://www.springframework.org/schema/context/spring-context.xsd
	        http://www.springframework.org/schema/util     
	        http://www.springframework.org/schema/util/spring-util.xsd">

 	<!-- Application properties configs  应用程序属性配置文件-->
	<bean id="propertySourcesPlaceholderConfigurer" class="org.springframework.context.support.PropertySourcesPlaceholderConfigurer">
	  <property name="locations"><list>
	   	 <value>/pipeline/config/applicationConfig.properties</value>
	  </list></property>
	  <property name="ignoreUnresolvablePlaceholders" value="true"/>
	</bean>
 	
 	<!-- Datastore configuration /数据源配置 -->
 	<import resource="database.xml"/>
 	
 	<bean id="transDataLoad_STUD" class="com.blazer.load.file.runner.TransDataLoadFile">
		<!--导入数据源-->
 		<property name="sourceDataSource" ref="datasource_oracle"/>
		<!--每次提交数据量-->
 		<property name="commitNumber" value="100"/>
		<!--字段最长限制-->
 		<property name="limitTextSize" value="0"/>
		<!--导入目标表名-->
 		<property name="tableName" value="STUDENT"/>
		<!--导入文件名-->
 		<property name="loadFileName" value="student_2020"/>
		<!--导入文件扩展名类型 csv/xlsx/xls-->
 		<property name="fileType" value="xls"/>
		<!--导入文件路径-->
 		<property name="loadFilePath" value="D:/dmp/"/>
		<!--是否跳过首行 true是跳过,false不跳过,默认为false-->
 		<property name="skipFirstRow" value="true"/>
 		<!--导入文件后缀-->
 		<property name="fileNameSuffix" value=".xls"/>
		<!--导入csv类型文件分隔符-->
 		<property name="terminatedString" value="|+|"/>
		<!--导入字段列表-->
 		<property name="listTableColumns" > 
 			<util:list  list-class="java.util.ArrayList">
				<bean class="com.db.TableColumns">
					<!--导入字段名-->
					<property name="columnName" value="STUD_NAME"/>
					<!--导入字段类型-->
 					<property name="dataType" value="VARCHAR"/>
					<!--导入字段是否跳过-->
					<property name="skip" value="false"/>
					<!--导入字段为固定值-->
					<property name="fixed" value="false"/>
					<!--导入字段默认值,配合fixed使用-->
					<property name="defaultValue" value=""/>
				</bean>
				<bean class="com.db.TableColumns">
					<property name="columnName" value="STUD_SEX"/>
 					<property name="dataType" value="VARCHAR"/>
					<!--导入字段基于javascript转换脚本-->
					<property name="convert" value="
						if(dataValue=='M'){
							returnValue='男';
						}else if(columns[1]=='男'){
							returnValue='男';
						}
						if(dataValue=='F'){
							returnValue='女';
						}
					"/>
				</bean>
		 	</util:list>
 		</property>
 		
 	</bean>
 	
 	<bean id="transDataLoad" class="com.blazer.load.file.TransDataLoad">
		<!--默认数据源-->
 		<property name="sourceDataSource" ref="datasource_oracle"/>
		<property name="fromUrl" value="${datasource_oracle.url}"/>
 		<property name="fromUser" value="${datasource_oracle.username}"/> 
		<!--默认导入线程数-->
 		<property name="threadSize" value="10"/>
		<!--默认导入文件路径-->
 		<property name="loadFilePath" value="D:/dmp/"/>
		<!--导入任务列表-->
 		<property name="transDataLoadFileList" > 
 			<util:list  list-class="java.util.ArrayList">
				<ref bean="transDataLoad_STUD"/>
		 	</util:list>
 		</property>
 	</bean>
 
 	<!-- 配置执行的任务列表  -->
 	<util:list id="pipeLineTask" list-class="java.util.ArrayList">
		<ref bean="transDataLoad"/>
 	</util:list>
</beans>
4、数据表结构导出特定数据库DDL pipeLine
<?xml version="1.0" encoding="UTF-8"?>
<beans 	xmlns="http://www.springframework.org/schema/beans"
		xmlns:context="http://www.springframework.org/schema/context"
		xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
		xmlns:util="http://www.springframework.org/schema/util"
		xsi:schemaLocation="
	        http://www.springframework.org/schema/beans     
	        http://www.springframework.org/schema/beans/spring-beans.xsd
	        http://www.springframework.org/schema/context 
	        http://www.springframework.org/schema/context/spring-context.xsd
	        http://www.springframework.org/schema/util     
	        http://www.springframework.org/schema/util/spring-util.xsd">
	<!-- Application properties configs  应用程序属性配置文件-->
	<bean id="propertySourcesPlaceholderConfigurer" class="org.springframework.context.support.PropertySourcesPlaceholderConfigurer">
	  <property name="locations"><list>
	   	 <value>/pipeline/config/applicationConfig.properties</value>
	  </list></property>
	  <property name="ignoreUnresolvablePlaceholders" value="true"/>
	</bean>
 	<context:component-scan base-package="com.blazer" />
 	<!-- Datastore configuration /数据源配置 -->
 	<import resource="database.xml"/>
	<!-- DDL导出描述文件 -->
 	<bean id="dllExport" class="com.blazer.ddl.DDLExport">
		<!-- 导出数据源 -->
 		<property name="dataSource" ref="datasource_oracle"/>
		<property name="url" value="${datasource_oracle.url}"/>
 		<property name="user" value="${datasource_oracle.username}"/> 
		<!-- 导出数据库的owner或者schema -->
 		<property name="owner" value="DW"/>
		<!-- 导出创建表的前缀 -->
 		<property name="tablePrefix" value="DWMART."/> 
		<!-- 导出权限赋予用户 -->
 		<property name="grantUser" value=""/> 
		<!-- 导出创建表的engine针对mysql -->
 		<property name="engine" value=""/>
		<!-- 导出目标数据库类型 -->
 		<property name="toDbType" value="Greenplum"/>
		<!-- 导出源数据库的表名列表 -->
 		<property name="configFilePath" value="#{systemProperties['APP_PATH']}/conf/oraTable2Greenplum_user.txt"/>
		<!-- 导出创建语句的输出文件 -->
 		<property name="exportFilePath" value="#{systemProperties['APP_PATH']}/export/exportOracle2Greenplum.sql"/>
 	</bean>
 	
 	<!-- 配置执行的任务列表  -->
 	<util:list id="pipeLineTask" list-class="java.util.ArrayList">
 		<ref bean="dllExport"/>
 	</util:list>
</beans>

项目简介

当前项目暂无项目简介

发行版本

当前项目没有发行版本

贡献者 1

shimingxy @shimingxy

开发语言

  • Java 97.2 %
  • Batchfile 2.5 %
  • Shell 0.4 %