Log Stack
- Grafana
- Clickhouse
- Vector
info
该案例为 收集 syslog
Vector
数据收集
# syslog-lan.yaml
data_dir: "/var/lib/vector"
# remote syslog
sources:
remote_udp_syslog_1:
type: syslog
address: 0.0.0.0:514
mode: udp
# Parse Syslog logs
# See the Vector Remap Language reference for more info: https://vrl.dev
transforms:
parse_logs_1:
type: remap
metric_tag_values: "full"
inputs:
- remote_udp_syslog_1
# 流程为:
# 先执行标准日志收集 parse_syslog
# 如果失败 -> 执行通用日志收集 parse_common_log
# 如果失败 -> 直接把数据返回不做处理
source: >-
structured =
parse_syslog(.message) ??
parse_common_log(.message) ??
.
. = merge(., structured)
# Sink to ch
sinks:
# 输出到 console
console:
type: console
encoding:
# json 可以看到 remap 的结构
codec: json
inputs:
- parse_logs_1
ch:
# 输出到 clickhouse
type: clickhouse
inputs:
- parse_logs_1
# 建议使用 http 端点
endpoint: http://192.168.10.2:8123
format: json_each_row
database: monitors
table: syslogs
compression: gzip
skip_unknown_fields: true
date_time_best_effort: true
auth:
strategy: basic
user: clickhouse
password: clickhouse
启动Vector:
./vector -C /opt/vector/config/lan --require-healthy=true
Clickhouse
数据存储层,当然你也可以换其他的,但是对于 Grafana 的查询语句需要自行做兼容处理
parse_logs 阶段产生的日志结构是相对固定的,而且clickhouse的表结构也是相对固定的,需要自行创建合理的结构
CREATE TABLE monitors.syslogs (
`id` UUID DEFAULT generateUUIDv4 (),
`timestamp` DateTime64 (3),
`appname` LowCardinality (String),
`facility` LowCardinality (String),
`host` LowCardinality (String),
`hostname` LowCardinality (String),
`message` String,
`procid` UInt32,
`severity` LowCardinality (String),
`source_ip` IPv4,
`source_type` LowCardinality (String),
`_date` DateTime DEFAULT toDateTime (timestamp)
) ENGINE = MergeTree
PARTITION BY
toYYYYMM (_date) PRIMARY KEY id
ORDER BY (
id, timestamp, hostname, severity, _date
) SETTINGS index_granularity = 8192
Grafana
数据展示层,其实也是最难配置的一层
初始Account是:admin/grafana
配置一个BarChart:
SELECT
toStartOfMinute(timestamp) AS timestamp, -- 按分钟分组时间戳
SUM(severity = 'info') AS info, -- 统计 'info' 的数量
SUM(severity = 'notice') AS notice, -- 统计 'notice' 的数量
SUM(severity = 'warning') AS warning, -- 统计 'warning' 的数量
SUM(severity = 'crit') AS crit, -- 统计 'crit' 的数量
SUM(severity = 'err') AS err -- 统计 'err' 的数量
FROM monitors.syslogs
WHERE ( timestamp >= $__fromTime AND timestamp <= $__toTime )
GROUP BY timestamp -- 按分钟分组
ORDER BY timestamp -- 按时间排序
由以下推断的查询:
根据 Grafana 官方文档,BarChart 需要类别为列:https://grafana.com/docs/grafana/latest/visualizations/panels-visualizations/visualizations/bar-chart/#supported-data-formats
大概是这样的结构:
| Group | Value1 | Value2 | Value3 |
|---|---|---|---|
| uno | 1 | 2 | 3 |
| abc | 2 | 3 | 5 |
-- 查找所有的severity
select distinct severity from syslogs;
-- 通过时间和severity聚合count
SELECT
toStartOfMinute(timestamp) as timestamp,
severity,
count(id) as count
FROM monitors.syslogs
WHERE ( timestamp >= $__fromTime AND timestamp <= $__toTime )
GROUP BY timestamp, severity
ORDER BY timestamp DESC LIMIT 1000;
配置Logs显示:
SELECT timestamp, message as "body", severity as "level", hostname as "labels"
FROM "monitors"."syslogs"
WHERE ( timestamp >= $__fromTime AND timestamp <= $__toTime )
ORDER BY timestamp DESC LIMIT 1000
可直接导入的 Json Model:
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "grafana",
"uid": "-- Grafana --"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"editable": false,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": 0,
"links": [],
"panels": [
{
"datasource": {
"type": "grafana-clickhouse-datasource",
"uid": "ef4lswms59mo0c"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"axisWidth": -6,
"fillOpacity": 80,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineWidth": 0,
"scaleDistribution": {
"type": "linear"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 6,
"w": 24,
"x": 0,
"y": 0
},
"id": 3,
"options": {
"barRadius": 0.1,
"barWidth": 0.64,
"fullHighlight": false,
"groupWidth": 0.7,
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"orientation": "auto",
"showValue": "auto",
"stacking": "normal",
"text": {},
"tooltip": {
"hideZeros": false,
"mode": "single",
"sort": "none"
},
"xField": "timestamp",
"xTickLabelRotation": 0,
"xTickLabelSpacing": 100
},
"pluginVersion": "12.2.2",
"targets": [
{
"builderOptions": {
"columns": [],
"database": "default",
"limit": 1000,
"meta": {},
"mode": "list",
"queryType": "table",
"table": ""
},
"datasource": {
"type": "grafana-clickhouse-datasource",
"uid": "ef4lswms59mo0c"
},
"editorType": "sql",
"format": 1,
"hide": false,
"meta": {
"builderOptions": {
"columns": [],
"database": "default",
"limit": 1000,
"meta": {},
"mode": "list",
"queryType": "table",
"table": ""
}
},
"pluginVersion": "4.11.2",
"queryType": "table",
"rawSql": "SELECT \n toStartOfMinute(timestamp) AS timestamp, -- 按分钟分组时间戳\n SUM(severity = 'info') AS info, -- 统计 'info' 的数量\n SUM(severity = 'notice') AS notice, -- 统计 'err' 的数量\n SUM(severity = 'warning') AS warning, -- 统计 'err' 的数量\n SUM(severity = 'crit') AS crit, -- 统计 'err' 的数量\n SUM(severity = 'err') AS err -- 统计 'err' 的数量\nFROM monitors.syslogs\nWHERE ( timestamp >= $__fromTime AND timestamp <= $__toTime ) \nGROUP BY timestamp -- 按分钟分组\nORDER BY timestamp -- 按时间排序",
"refId": "A"
}
],
"title": "New panel",
"type": "barchart"
},
{
"datasource": {
"type": "grafana-clickhouse-datasource",
"uid": "ef4lswms59mo0c"
},
"fieldConfig": {
"defaults": {},
"overrides": []
},
"gridPos": {
"h": 17,
"w": 24,
"x": 0,
"y": 6
},
"id": 1,
"options": {
"dedupStrategy": "exact",
"enableInfiniteScrolling": true,
"enableLogDetails": true,
"prettifyLogMessage": true,
"showCommonLabels": false,
"showLabels": false,
"showTime": true,
"sortOrder": "Descending",
"wrapLogMessage": false
},
"pluginVersion": "12.2.2",
"targets": [
{
"builderOptions": {
"columns": [
{
"hint": "time",
"name": "timestamp",
"type": "DateTime64(3)"
},
{
"alias": "severity",
"hint": "log_level",
"name": "severity",
"type": "LowCardinality(String)"
},
{
"alias": "message",
"hint": "log_message",
"name": "message",
"type": "String"
},
{
"alias": "appname",
"hint": "log_labels",
"name": "appname",
"type": "LowCardinality(String)"
}
],
"database": "monitors",
"filters": [
{
"condition": "AND",
"filterType": "custom",
"hint": "time",
"key": "",
"operator": "WITH IN DASHBOARD TIME RANGE",
"type": "datetime"
},
{
"condition": "AND",
"filterType": "custom",
"hint": "log_level",
"key": "",
"operator": "IS ANYTHING",
"type": "string"
}
],
"limit": 1000,
"meta": {
"logMessageLike": "",
"otelVersion": "latest"
},
"mode": "list",
"orderBy": [
{
"default": true,
"dir": "DESC",
"hint": "time",
"name": ""
}
],
"queryType": "logs",
"table": "syslogs"
},
"datasource": {
"type": "grafana-clickhouse-datasource",
"uid": "ef4lswms59mo0c"
},
"editorType": "sql",
"format": 2,
"meta": {
"builderOptions": {
"columns": [
{
"hint": "time",
"name": "timestamp",
"type": "DateTime64(3)"
},
{
"alias": "severity",
"hint": "log_level",
"name": "severity",
"type": "LowCardinality(String)"
},
{
"alias": "message",
"hint": "log_message",
"name": "message",
"type": "String"
},
{
"alias": "appname",
"hint": "log_labels",
"name": "appname",
"type": "LowCardinality(String)"
}
],
"database": "monitors",
"filters": [
{
"condition": "AND",
"filterType": "custom",
"hint": "time",
"key": "",
"operator": "WITH IN DASHBOARD TIME RANGE",
"type": "datetime"
},
{
"condition": "AND",
"filterType": "custom",
"hint": "log_level",
"key": "",
"operator": "IS ANYTHING",
"type": "string"
}
],
"limit": 1000,
"meta": {
"logMessageLike": "",
"otelVersion": "latest"
},
"mode": "list",
"orderBy": [
{
"default": true,
"dir": "DESC",
"hint": "time",
"name": ""
}
],
"queryType": "logs",
"table": "syslogs"
}
},
"pluginVersion": "4.11.2",
"queryType": "logs",
"rawSql": "SELECT timestamp, message as \"body\", severity as \"level\", hostname as \"labels\" FROM \"monitors\".\"syslogs\" WHERE ( timestamp >= $__fromTime AND timestamp <= $__toTime ) ORDER BY timestamp DESC LIMIT 1000",
"refId": "A"
}
],
"title": "All Sys Logs",
"type": "logs"
}
],
"preload": false,
"refresh": "auto",
"schemaVersion": 42,
"tags": [],
"templating": {
"list": []
},
"time": {
"from": "now-2h",
"to": "now"
},
"timepicker": {},
"timezone": "browser",
"title": "SysLogs",
"uid": "ad7ghk2",
"version": 17
}
Trouble shorting
Caddy 输出到syslog的日志是json的
改 Caddy 配置:
# 全局配置
{
log {
format console
}
}
# LuCI Dashboard
:80 {
# root * /
# file_server browse
reverse_proxy :800
}
Grafana 显示的 timestamp 时间不对
这需要查找每个节点上的时区是否都是本地时区,in this case,我的 Vector 上面的时区是UTC的,有一半的日志都是错误的
在 Alpine Linux 可以用 setup-timezone 来更正时区。