sipcapture/homer7-docker

Hom7-prom-all: Removing loki causes homer-webapp to stop displaying traces

n8twj opened this issue · 13 comments

n8twj commented

If we remove Loki, homer-webapp stops displaying traces, but grafana still updates its graphing. How do we remove Loki but keep homer-webapp displaying traces?

n8twj commented

Homer-webapp works just fine in hom7-influx-tick example, which does not run Loki

Cannot replicate any such issue. As you also know, there are examples without any Loki emitters working fine.
Please provide evidence and examples for your problem and your full configuration in order to get any suggestion.

n8twj commented

I am running hom7-prom-all directly from github with no modifications. Simply comment out the references to loki and you can easily replicate homer-webapp not updating

Hey @n8twj your format of last answer it's quite impossible to read. Could you gently reformat in a more readable way ?

n8twj commented
version: '2.1'

volumes:
    prometheus_data: {}
    grafana_data: {}

services:
  prometheus:
    image: prom/prometheus:latest
    container_name: prometheus
    volumes:
      - ./prometheus/:/etc/prometheus/
      - prometheus_data:/prometheus
    command:
      - '--config.file=/etc/prometheus/prometheus.yml'
      - '--storage.tsdb.path=/prometheus'
      - '--web.console.libraries=/etc/prometheus/console_libraries'
      - '--web.console.templates=/etc/prometheus/consoles'
      - '--storage.tsdb.retention.time=30d'
      - '--web.enable-lifecycle'
    restart: unless-stopped
    expose:
      - 9090
    labels:
      org.label-schema.group: "monitoring"
    logging:
      options:
        max-size: "50m"

  alertmanager:
    image: prom/alertmanager:latest
    container_name: alertmanager
    volumes: 
      - ./alertmanager/:/etc/alertmanager/
    command:
      - '--config.file=/etc/alertmanager/config.yml'
      - '--storage.path=/alertmanager'
    restart: unless-stopped
    expose:
      - 9093
    labels:
      org.label-schema.group: "monitoring"
    logging:
      options:
        max-size: "50m"
    depends_on:
      - grafana

  nodeexporter:
    image: prom/node-exporter:latest
    container_name: nodeexporter
    user: root
    privileged: true
    volumes:
      - /proc:/host/proc:ro
      - /sys:/host/sys:ro
      - /:/rootfs:ro
    command:
      - '--path.procfs=/host/proc'
      - '--path.sysfs=/host/sys'
      - '--collector.filesystem.ignored-mount-points=^/(sys|proc|dev|host|etc)($$|/)'
    restart: unless-stopped
    expose:
      - 9100
    labels:
      org.label-schema.group: "monitoring"
    logging:
      options:
        max-size: "50m"
    depends_on:
      - prometheus
  
  grafana:
    image: grafana/grafana:master
    container_name: grafana
    volumes:
      - grafana_data:/var/lib/grafana
      - ./grafana/provisioning/:/etc/grafana/provisioning/
    environment:
      - GF_SECURITY_ALLOW_EMBEDDING=true
      - GF_AUTH_ANONYMOUS_ENABLED=true
      - GF_AUTH_ANONYMOUS_ORG_ROLE=Viewer
      - GF_AUTH_OAUTH_AUTO_LOGIN=true
      - GF_SECURITY_ADMIN_USER=${ADMIN_USER:-admin}
      - GF_SECURITY_ADMIN_PASSWORD=${ADMIN_PASSWORD:-admin}
      - GF_USERS_ALLOW_SIGN_UP=false
      - GF_EXPLORE_ENABLED=true
    restart: unless-stopped
    ports:
      - "3000:3000"
    healthcheck:
      test: ["CMD-SHELL", "wget --quiet --tries=1 --spider http://localhost:3000/login || exit 1"]
      interval: 1s
      timeout: 3s
      retries: 30
    labels:
      org.label-schema.group: "monitoring"
    logging:
      options:
        max-size: "50m"

  #loki:
  #  image: grafana/loki
  #  container_name: loki
  #  restart: unless-stopped
  #  expose:
  #    - 3100
  #  ports:
  #    - "3100:3100"
  #  labels:
  #    org.label-schema.group: "monitoring"
  #  logging:
  #    options:
  #      max-size: "50m"

  heplify-server:
    image: sipcapture/heplify-server
    container_name: heplify-server
    ports:
      - "9069:9060"
      - "9069:9060/udp"
      - "9061:9061/tcp"
    command:
      - './heplify-server'
    environment:
      - "HEPLIFYSERVER_HEPADDR=0.0.0.0:9060"
      - "HEPLIFYSERVER_HEPTCPADDR=0.0.0.0:9061"
      - "HEPLIFYSERVER_DBSHEMA=homer7"
      - "HEPLIFYSERVER_DBDRIVER=postgres"
      - "HEPLIFYSERVER_DBADDR=db:5432"
      - "HEPLIFYSERVER_DBUSER=db"
      - "HEPLIFYSERVER_DBPASS=homerSeven"
      - "HEPLIFYSERVER_DBDATATABLE=homer_data"
      - "HEPLIFYSERVER_DBCONFTABLE=homer_config"
      - "HEPLIFYSERVER_DBROTATE=true"
      - "HEPLIFYSERVER_DBDROPDAYS=1"
      - "HEPLIFYSERVER_DBDROPDAYSCALL=1"
      - "HEPLIFYSERVER_DBDROPDAYSREGISTER=1"
      - "HEPLIFYSERVER_DBDROPDAYSDEFAULT=1"
      - "HEPLIFYSERVER_DBDROPONSTART=true"
#      - "HEPLIFYSERVER_DBUSAGEPROTECTION=true"
      - "HEPLIFYSERVER_DBUSAGESCHEME=percentage"
      - "HEPLIFYSERVER_DBPERCENTAGEUSAGE=50%"
      - "HEPLIFYSERVER_LOGLVL=info"
      - "HEPLIFYSERVER_LOGSTD=true"
      - "HEPLIFYSERVER_PROMADDR=0.0.0.0:9096"
#      - "HEPLIFYSERVER_PROMTARGETIP=127.0.0.1"
#      - "HEPLIFYSERVER_PROMTARGETNAME=local"
      - "HEPLIFYSERVER_DEDUP=false"
 #     - "HEPLIFYSERVER_LOKIURL=http://loki:3100/api/prom/push"
 #     - "HEPLIFYSERVER_LOKITIMER=2"
    restart: unless-stopped
    depends_on:
 #     - loki
      - db
    expose:
      - 9090
      - 9096
    labels:
      org.label-schema.group: "monitoring"
    logging:
      options:
        max-size: "50m"

  homer-webapp:
    container_name: homer-webapp
    image: sipcapture/webapp
    environment:
#      - "LOKI_HOST=loki"
      - "PROM_HOST=prometheus"
      - "DB_HOST=db"
      - "DB_USER=db"
      - "DB_PASS=homerSeven"
    restart: unless-stopped
    ports:
      - "9090:80"
    volumes:
      - ./bootstrap:/app/bootstrap
    logging:
      options:
        max-size: "50m"
    depends_on:
      db:
        condition: service_healthy
      grafana:
        condition: service_healthy

  db:
    container_name: db
    image: postgres:11-alpine
    restart: always
    environment:
      POSTGRES_PASSWORD: homerSeven
      POSTGRES_USER: db
    expose:
      - 5432
    restart: unless-stopped
    volumes:
      - ./init-user-db.sh:/docker-entrypoint-initdb.d/init-user-db.sh
      - ./postgres-data:/var/lib/postgresql/data
    healthcheck:
      test: ["CMD-SHELL", "psql -h 'localhost' -U 'db' -c '\\l'"]
      interval: 1s
      timeout: 3s
      retries: 30
    logging:
      options:
        max-size: "50m"
    depends_on:
      - grafana
n8twj commented

git clone https://github.com/sipcapture/homer7-docker/tree/master/heplify-server/hom7-prom-all
comment out all of the references to Loki
docker-compose up -d
send HEP traffic
experience for yourself.

If we remove Loki, homer-webapp stops displaying traces

You need to dig deeper and report meaningful details. Does heplify-server stop sending HEP? Does it sent it and its not stored? Follow the logical path. If there's data and queries are failing, please paste the response from the API as you experience this error, or any evidence of any error really, such as the logs from the service containers you believe to be failing. Loki is not required by anything functionally speaking so the root cause is probably related to a different parameter entirely.

but grafana still updates its graphing

There is no relation between Loki and the metrics scraped by prometheus

n8twj commented

If grafana is still hapily updating the graphs, wouldn't that suggest HEP data is still somehow getting through the system?

2022/10/05 13:15:39.517153 server.go:297: INFO stats since last 5 minutes. PPS: 1188, HEP: 356403, Filtered: 0, Error: 36
2022/10/05 13:40:39.517057 server.go:297: INFO stats since last 5 minutes. PPS: 1650, HEP: 495231, Filtered: 0, Error: 0

n8twj commented

I am very much turning up an EC2 instance, installing docker/docker-compose, running the examples provided and sending HEP data from our kamailio instances. No other configuration changes anywhere. If I simply comment out the LOKI references in docker-compose.yml homer-webapp no longer displays traces.

n8twj commented

homer-webapp logs simply contain this:


Pre-Flight provisioning completed!
 
CONNECT to DB ROOT STRING: [host=db user=db dbname=postgres sslmode=disable port=5432 password=homerSeven]
 
 
HOMER - create db [homer_config] with [name=db] 

(/homer-app/migration/migration.go:85) 
[2022-10-04 19:00:41]  pq: database "homer_config" already exists 

(/homer-app/migration/migration.go:85) 
[2022-10-04 19:00:41]  [0.56ms]  CREATE DATABASE homer_config OWNER db  
[0 rows affected or returned ] 
 
DONE 
 
HOMER - show users 
	Role name	|	Attributes
------------------------------------------------
	db	|	superuser, create database	
 
DONE 
 
CONNECT to DB ROOT STRING: [host=db user=db dbname=postgres sslmode=disable port=5432 password=homerSeven]
 
 
HOMER - create db [homer_data] with [name=db] 

(/homer-app/migration/migration.go:85) 
[2022-10-04 19:00:42]  [242.84ms]  CREATE DATABASE homer_data OWNER db  
[0 rows affected or returned ] 
 
DONE 
 
HOMER - show users 
	Role name	|	Attributes
------------------------------------------------
	db	|	superuser, create database	
 
DONE 
 
HOMER - creating tables for the config DB [dbname=homer_config] 
 
DONE 
 
HOMER - filling tables for the config DB [dbname=homer_config] 
 reinstalling users 
 reinstalling global_settings 
 reinstalling auth_token 
 reinstalling agent_location_session 
 reinstalling hepsub_mapping_schema 
 reinstalling user_settings 
 reinstalling mapping_schema 
 reinstalling versions 
 
DONE 
Database provisioning completed!
Successful ping: db, Type: data, Node: LocalNode 
PostgreSQL version: 11.12
 
 
         ___              
        /__/\             
        \  \:\           
         \__\:\  
     ___ /  /::\     
    /__/\  /:/\:\     _____ _____    
    \  \:\/:/__\/    |___  |___  |   
     \  \::/            / /   / / 
      \  \:\           / /   / /          
       \  \:\         /_(_) /_/               
        \__\/         

 
 Version: homer-app 1.3.22 
⇨ http server started on [::]:80
Successful ping: db, Type: data, Node: LocalNodeSuccessful ping: db,
LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: LocalNodeSuccessful ping: db, Type: data, Node: 

n8twj commented

$ docker logs heplify-server

2022/10/05 14:25:39.517118 server.go:297: INFO stats since last 5 minutes. PPS: 29, HEP: 8864, Filtered: 0, Error: 0 2022/10/05 14:30:39.517067 server.go:297: INFO stats since last 5 minutes. PPS: 1091, HEP: 327511, Filtered: 0, Error: 0 2022/10/05 14:35:39.517056 server.go:297: INFO stats since last 5 minutes. PPS: 1863, HEP: 558966, Filtered: 0, Error: 0

Lots of info but you are yet to show us what's failing?

If there's data and queries are failing, please paste the response from the API as you experience this error, or any evidence of any error really, such as the logs from the service containers you believe to be failing

n8twj commented

I presume either my commenting out loki (somehow) or the depends-on db requirement isn't always working as expected. If I restart heplify-server separately after the rest of the stack is up, we now get back to my other issue of hard drive space getting burnt up :(


[ec2-user@ip-172-31-42-201 hom7-prom-all]$ docker logs heplify-server
Could not find toml config file, use flag defaults. <nil>
2022/10/05 15:38:37.640224 server.go:93: INFO start heplify-server 1.57 with config.HeplifyServer{HEPAddr:"0.0.0.0:9060", HEPTCPAddr:"0.0.0.0:9061", HEPTLSAddr:"", HEPWSAddr:"", ESAddr:"", ESDiscovery:true, HEPv2Enable:true, ESUser:"", ESPass:"", LokiURL:"", LokiBulk:400, LokiTimer:4, LokiBuffer:100000, LokiHEPFilter:[]int{1, 5, 100}, ForceHEPPayload:[]int(nil), PromAddr:"0.0.0.0:9096", PromTargetIP:"", PromTargetName:"", DBShema:"homer7", DBDriver:"postgres", DBAddr:"db:5432", DBSSLMode:"disable", DBUser:"db", DBPass:"<private>", DBDataTable:"homer_data", DBConfTable:"homer_config", DBBulk:400, DBTimer:4, DBBuffer:400000, DBWorker:8, DBRotate:true, DBPartLog:"2h", DBPartIsup:"6h", DBPartSip:"2h", DBPartQos:"6h", DBDropDays:1, DBDropDaysCall:1, DBDropDaysRegister:1, DBDropDaysDefault:1, DBDropOnStart:true, DBUsageProtection:false, DBUsageScheme:"percentage", DBPercentageUsage:"50%", DBMaxSize:"20GB", DBProcDropLimit:2, Dedup:false, DiscardMethod:[]string(nil), CensorMethod:[]string(nil), AlegIDs:[]string(nil), ForceALegID:false, CustomHeader:[]string(nil), IgnoreCaseCH:false, SIPHeader:[]string{"ruri_user", "ruri_domain", "from_user", "from_tag", "to_user", "callid", "cseq", "method", "user_agent"}, LogDbg:"", LogLvl:"info", LogStd:true, LogSys:false, Config:"./heplify-server.toml", ConfigHTTPAddr:"", ConfigHTTPPW:"", Version:false, ScriptEnable:false, ScriptEngine:"lua", ScriptFolder:"", ScriptHEPFilter:[]int{1, 5, 100}, TLSCertFolder:".", TLSMinVersion:"1.2"}
2022/10/05 15:38:37.640840 prometheus.go:38: INFO expose metrics without or unbalanced targets
2022/10/05 15:38:37.641867 rotator.go:539: INFO start creating tables (2022-10-05 15:38:37.641839347 +0000 UTC m=+0.013587301)
2022/10/05 15:38:37.643229 rotator.go:544: ERR dial tcp 192.168.240.4:5432: connect: connection refused
2022/10/05 15:38:37.649130 rotator.go:547: ERR dial tcp 192.168.240.4:5432: connect: connection refused
2022/10/05 15:38:37.649734 rotator.go:550: ERR dial tcp 192.168.240.4:5432: connect: connection refused
2022/10/05 15:38:37.649823 rotator.go:552: INFO end creating tables (2022-10-05 15:38:37.649785395 +0000 UTC m=+0.021533329)
2022/10/05 15:38:37.650301 rotator.go:555: ERR dial tcp 192.168.240.4:5432: connect: connection refused
2022/10/05 15:38:37.650779 server.go:152: ERR dial tcp 192.168.240.4:5432: connect: connection refused
2022/10/05 15:42:08.049198 server.go:255: WARN overflowing db channel, please adjust DBWorker or DBBuffer setting
2022/10/05 15:42:08.049287 server.go:255: WARN overflowing db channel, please adjust DBWorker or DBBuffer setting
2022/10/05 15:42:08.049454 server.go:255: WARN overflowing db channel, please adjust DBWorker or DBBuffer setting
2022/10/05 15:42:08.049887 server.go:255: WARN overflowing db channel, please adjust DBWorker or DBBuffer setting
2022/10/05 15:42:18.716084 heplify-server.go:102: INFO stopping heplify-server...
2022/10/05 15:42:18.716183 udp.go:27: INFO stopping UDP listener on [::]:9060
2022/10/05 15:42:19.663318 tcp.go:33: INFO stopping TCP listener on [::]:9061
2022/10/05 15:42:19.663416 database.go:75: INFO close postgres channel
2022/10/05 15:42:19.663446 metric.go:69: INFO close metric channel
2022/10/05 15:42:19.663457 heplify-server.go:111: INFO heplify-server has been stopped
Could not find toml config file, use flag defaults. <nil>
2022/10/05 15:42:20.986331 server.go:93: INFO start heplify-server 1.57 with config.HeplifyServer{HEPAddr:"0.0.0.0:9060", HEPTCPAddr:"0.0.0.0:9061", HEPTLSAddr:"", HEPWSAddr:"", ESAddr:"", ESDiscovery:true, HEPv2Enable:true, ESUser:"", ESPass:"", LokiURL:"", LokiBulk:400, LokiTimer:4, LokiBuffer:100000, LokiHEPFilter:[]int{1, 5, 100}, ForceHEPPayload:[]int(nil), PromAddr:"0.0.0.0:9096", PromTargetIP:"", PromTargetName:"", DBShema:"homer7", DBDriver:"postgres", DBAddr:"db:5432", DBSSLMode:"disable", DBUser:"db", DBPass:"<private>", DBDataTable:"homer_data", DBConfTable:"homer_config", DBBulk:400, DBTimer:4, DBBuffer:400000, DBWorker:8, DBRotate:true, DBPartLog:"2h", DBPartIsup:"6h", DBPartSip:"2h", DBPartQos:"6h", DBDropDays:1, DBDropDaysCall:1, DBDropDaysRegister:1, DBDropDaysDefault:1, DBDropOnStart:true, DBUsageProtection:false, DBUsageScheme:"percentage", DBPercentageUsage:"50%", DBMaxSize:"20GB", DBProcDropLimit:2, Dedup:false, DiscardMethod:[]string(nil), CensorMethod:[]string(nil), AlegIDs:[]string(nil), ForceALegID:false, CustomHeader:[]string(nil), IgnoreCaseCH:false, SIPHeader:[]string{"ruri_user", "ruri_domain", "from_user", "from_tag", "to_user", "callid", "cseq", "method", "user_agent"}, LogDbg:"", LogLvl:"info", LogStd:true, LogSys:false, Config:"./heplify-server.toml", ConfigHTTPAddr:"", ConfigHTTPPW:"", Version:false, ScriptEnable:false, ScriptEngine:"lua", ScriptFolder:"", ScriptHEPFilter:[]int{1, 5, 100}, TLSCertFolder:".", TLSMinVersion:"1.2"}
2022/10/05 15:42:20.986669 prometheus.go:38: INFO expose metrics without or unbalanced targets
2022/10/05 15:42:20.986804 rotator.go:539: INFO start creating tables (2022-10-05 15:42:20.986792954 +0000 UTC m=+0.012423620)
2022/10/05 15:42:26.592790 rotator.go:552: INFO end creating tables (2022-10-05 15:42:26.592770633 +0000 UTC m=+5.618401296)
2022/10/05 15:42:26.847043 postgres.go:61: INFO postgres connection established
2022/10/05 15:47:20.987367 server.go:297: INFO stats since last 5 minutes. PPS: 1974, HEP: 592461, Filtered: 0, Error: 0