目录
一、Ambari表信息
1.1.hoststate(主机状态表)
1.2.hostcomponentstate(主机组件状态表)
1.3. hosts(主机表)
1.4.clusters(集群表)
二、Ambari组件信息同步实现
2.1.编写table.ini 的数据库配置文件
2.2.Shell 实现表信息的同步
mysql> desc hoststate;
+-------------------+--------------+------+-----+---------+
| Field | Type | Null | Key | Default |
+-------------------+--------------+------+-----+---------+
| agent_version | varchar(255) | NO | | NULL | agent版本
| available_mem | bigint(20) | NO | | NULL | 可用内存
| current_state | varchar(255) | NO | | NULL | 当前状态
| health_status | varchar(255) | YES | | NULL | 健康状态
| host_id | bigint(20) | NO | PRI | NULL | 主机id
| time_in_state | bigint(20) | NO | | NULL |
| maintenance_state | varchar(512) | YES | | NULL | 维护模式状态
+-------------------+--------------+------+-----+---------+
mysql> desc hostcomponentstate;
+-----------------+--------------+------+-----+---------+
| Field | Type | Null | Key | Default |
+-----------------+--------------+------+-----+---------+
| id | bigint(20) | NO | PRI | NULL |
| cluster_id | bigint(20) | NO | | NULL |
| component_name | varchar(100) | NO | MUL | NULL | 组件名称
| version | varchar(32) | NO | | UNKNOWN | 版本
| current_state | varchar(255) | NO | | NULL |
| last_live_state | varchar(255) | NO | | UNKNOWN | 上一次活跃状态
| host_id | bigint(20) | NO | MUL | NULL |
| service_name | varchar(100) | NO | | NULL | 组件对应服务名称
| upgrade_state | varchar(32) | NO | | NONE |
+-----------------+--------------+------+-----+---------+
mysql> desc hosts;
+------------------------+---------------+------+-----+---------+
| Field | Type | Null | Key | Default |
+------------------------+---------------+------+-----+---------+
| host_id | bigint(20) | NO | PRI | NULL |
| host_name | varchar(255) | NO | UNI | NULL | 主机名称
| cpu_count | int(11) | NO | | NULL | cpu数量
| cpu_info | varchar(255) | NO | | NULL | cpu信息
| discovery_status | varchar(2000) | NO | | NULL |
| host_attributes | longtext | NO | | NULL | 主机属性
| ipv4 | varchar(255) | YES | | NULL | IP地址
| ipv6 | varchar(255) | YES | | NULL | IP地址
| last_registration_time | bigint(20) | NO | | NULL | 最后登记时间
| os_arch | varchar(255) | NO | | NULL | 操作系统架构
| os_info | varchar(1000) | NO | | NULL |
| os_type | varchar(255) | NO | | NULL |
| ph_cpu_count | int(11) | YES | | NULL |
| public_host_name | varchar(255) | YES | | NULL | 主机名
| rack_info | varchar(255) | NO | | NULL | 机架信息
| total_mem | bigint(20) | NO | | NULL | 总内存
+------------------------+---------------+------+-----+---------+
+-----------------------+--------------+------+-----+---------+-------+
| Field | Type | Null | Key | Default | Extra |
+-----------------------+--------------+------+-----+---------+-------+
| cluster_id | bigint(20) | NO | PRI | NULL | |
| resource_id | bigint(20) | NO | MUL | NULL | | 资源id
| upgrade_id | bigint(20) | YES | MUL | NULL | |
| cluster_info | varchar(255) | NO | | NULL | |
| cluster_name | varchar(100) | NO | UNI | NULL | | 集群名称
| provisioning_state | varchar(255) | NO | | INIT | | 配置状态
| security_type | varchar(32) | NO | | NONE | | 安全策略类型
| desired_cluster_state | varchar(255) | NO | | NULL | | 预期集群状态
| desired_stack_id | bigint(20) | NO | MUL | NULL | | 预期库id
+-----------------------+--------------+------+-----+---------+-------+
1.5.表的关联查询
SELECT cluster_name,host_name,ipv4,service_name,a.current_state,component_name
FROM hostcomponentstate a
INNER JOIN hoststate b
on a.host_id = b.host_id
INNER JOIN hosts c
on a.host_id = c.host_id
INNER JOIN clusters d
ON d.cluster_id = a.cluster_id
WHERE a.current_state != 'INSTALLED'
查询出的信息我们将同步到 SQLServer表
将MySQL中Ambari 中 4个表的信息关联查询出的信息同步到SqlServer,任务调度频次5分钟一趟,发现问题就告警
xxx 是不同的客户不同的数据库 可以读取多个数据库的配置写到不同的SQLServer数据库
[[email protected] ~]$ cat /hadoop/datadir/script/hadoop/table.ini
[xxx_CONNECT]
url=xxx
port=1433
username=PCS.Support
[email protected]#
dbname=HDP_TEST
customer=xxx_
set -x
HOSTNAME="xxx"
USER="root"
PASSWD="@001"
PORT="3306"
DBNAME="ambari"
function ReadConnect(){
ReadINI=`awk -F '=' '/\['$2'\]/{a=1}a==1&&$1~/^'$3'$/{print $2;exit}' $1`
}
batchCustomer=xxx_
table_ini=/hadoop/datadir/script/hadoop/ipvaSum/table.ini
ReadConnect $table_ini "${batchCustomer}CONNECT" url
server=$ReadINI
ReadConnect $table_ini "${batchCustomer}CONNECT" port
port=$ReadINI
ReadConnect $table_ini "${batchCustomer}CONNECT" dbname
database=$ReadINI
ReadConnect $table_ini "${batchCustomer}CONNECT" username
user=$ReadINI
ReadConnect $table_ini "${batchCustomer}CONNECT" password
paw=$ReadINI
ambari_tmp_file=/hadoop/datadir/temp/monitor/component_tmp_file.txt
mysql_cmd="mysql -h${HOSTNAME} -P${PORT} -u${USER} -p${PASSWD} ${DBNAME} -e"
sqlserver_cmd="/opt/mssql-tools/bin/sqlcmd -S $server -U $user -P $paw -d ${database} -Q "
datebatch=`date +'%Y-%m-%d %H:%M:%S'`
# AmbariServer监控
ambariServerCount=`ps -ef | grep AmbariServer | grep -v "grep" | wc -l`
if [ 0 == $ambariServerCount ];then
${sqlserver_cmd} "INSERT into task_monitor (flowId,taskId,status,startTime,endTime) VALUES(DATEDIFF(S,'1970-01-01 00:00:00', GETDATE()),'ambari-server-heartbeat',90,GETDATE(),GETDATE())"
else
${sqlserver_cmd} "INSERT into task_monitor (flowId,taskId,status,startTime,endTime) VALUES(DATEDIFF(S,'1970-01-01 00:00:00', GETDATE()),'ambari-server-heartbeat',80,GETDATE(),GETDATE())"
fi
#查询组件状态
select_ambari_sql="SELECT cluster_name , host_name , ipv4 , service_name ,a. current_state,component_name FROM hostcomponentstate a INNER JOIN hoststate b on a.host_id = b.host_id
INNER JOIN hosts c
on a.host_id = c.host_id
INNER JOIN clusters d
ON d.cluster_id = a.cluster_id
WHERE a.current_state != 'INSTALLED' into outfile \"${ambari_tmp_file}\" fields terminated by \",\" ;"
# meger
task_merge_sql="MERGE ${database}.[dbo].[component_monitor] AS a USING (SELECT cluster_name,host_name,ipv4,service_name,current_state,component_name FROM ${database}.[dbo].[component_monitor_tmp]) AS b ON a.host_name = b.host_name AND a.service_name=b.service_name AND a.component_name = b.component_name WHEN MATCHED THEN UPDATE SET a.current_state = b.current_state ,a.modifyTime='${datebatch}' WHEN NOT MATCHED THEN INSERT (cluster_name , host_name , ipv4 , service_name ,current_state , component_name,modifyTime,createTime) VALUES(b.cluster_name , b.host_name , b.ipv4 , b.service_name ,b.current_state , b.component_name,'${datebatch}','${datebatch}');"
update_stop_sql="UPDATE ${database}.[dbo].[component_monitor] SET current_state = 'STOPED' where modifyTime <(select max(modifyTime) from ${database}.[dbo].[component_monitor]);"
#执行SQL 运行成功和失败的任务 写入文件
rm -rf ${azkaban_exec_tmp_file}
${mysql_cmd} "${select_ambari_sql}"
if [ -f ${azkaban_exec_tmp_file} ];then
${sqlserver_cmd} "truncate table ${database}.[dbo].[component_monitor_tmp]"
/opt/mssql-tools/bin/bcp ${database}.dbo.component_monitor_tmp in ${azkaban_exec_tmp_file} -S${server} -U${user} -P${paw} -c -t, -r'\n' -b 1000
${sqlserver_cmd} "${task_merge_sql}"
${sqlserver_cmd} "${update_stop_sql}"
else
echo file ${azkaban_exec_tmp_file} not exist!
fi