[{"timezone":"CEST","title":"#Maintenance at #Grenoble from 2026-05-28@09:30 to 2025-05-28@12:00 : site services maintenance, frontend reboot","id":18296,"reporter":"Clément Parisot","assigned_to":"Support Staff","updated":1779965323,"created":1779198931,"priority":"P?","severity":"high: should be fixed in next month","component":"@Grenoble","resolution":"FIXED","status":"RESOLVED","start_date":1779953400,"end_date":1748426400,"tags":["maintenance","grenoble"],"url":"https://intranet.grid5000.fr/bugzilla/show_bug.cgi?id=18296","content":"update of service VMs, site will be unavailable\nmaintenance starts now\nmaintenance is over"},{"timezone":"CEST","title":"#Maintenance at #Nantes from 2026-05-28@09:00:00 to 2026-05-28@12:00:00 : site unavailable - software updates","id":18311,"reporter":"Alexis Bitaillou","assigned_to":"Alexis Bitaillou","updated":1779964771,"created":1779799911,"priority":"P?","severity":"high: should be fixed in next month","component":"@Nantes","resolution":"FIXED","status":"RESOLVED","start_date":1779951600,"end_date":1779962400,"tags":["maintenance","nantes"],"url":"https://intranet.grid5000.fr/bugzilla/show_bug.cgi?id=18311","content":"\nMàj OS VM, srv-[3-4, data2], pve-[1-2]\nMàn trixie dhcp, srv-data2, kavlan-[1-3], nat."},{"timezone":"CEST","title":"#Maintenance at #Lille from 2026-05-28@08:00 to 2026-05-28@17:00 : Site unavailable","id":18309,"reporter":"Hugo Dominois","assigned_to":"Support Staff","updated":1779950420,"created":1779798280,"priority":"P?","severity":"?: severity unknown, to be decided","component":"@Lille","resolution":"","status":"ASSIGNED","start_date":1779948000,"end_date":1779980400,"tags":["maintenance","lille"],"url":"https://intranet.grid5000.fr/bugzilla/show_bug.cgi?id=18309","content":"Multiple migration of VMs to trixie"},{"timezone":"CEST","title":"#Incident at #Lyon from 2026-05-20@16:30 : electrical outage has damage the NFS server","id":18302,"reporter":"Laurent Pouilloux","assigned_to":"Support Staff","updated":1779887467,"created":1779350012,"priority":"P?","severity":"?: severity unknown, to be decided","component":"@Lyon","resolution":"FIXED","status":"RESOLVED","start_date":1779287400,"end_date":1779887467,"tags":["incident","lyon"],"url":"https://intranet.grid5000.fr/bugzilla/show_bug.cgi?id=18302","content":"Lyon site will be unavailable until the NFS server is fixed.\nHPE support came this morning with a replacement motherboard, but sadly they take the wrong component. \n\nOperation will be tried again next tuesday. Site will remain down for the week-end.\nThe NFS server has been repaired yesterday.\n\nIt's a miracle, all disks are working.\nNo data has been lost."},{"timezone":"CEST","title":"#Maintenance at #Rennes from 2026-06-04@08:00 to 2026-06-04@12:00 : Site unavailable","id":18306,"reporter":"Noah Mandler","assigned_to":"Support Staff","updated":1779783170,"created":1779783170,"priority":"P?","severity":"?: severity unknown, to be decided","component":"@Rennes","resolution":"","status":"NEW","start_date":1780552800,"end_date":1780567200,"tags":["maintenance","rennes"],"url":"https://intranet.grid5000.fr/bugzilla/show_bug.cgi?id=18306","content":"Rectification du bonding sur pve-2/3\n\nPotentiels needrestarts d'ici-là."},{"timezone":"CEST","title":"#Network #Maintenance #Renater between #Paris and #nancy from 2026-05-23@23:00 to 2026-05-24@06:00 : Renater link is down between Paris and Nancy","id":18236,"reporter":"Nathan Paulin","assigned_to":"Support Staff","updated":1779775345,"created":1777030300,"priority":"P?","severity":"?: severity unknown, to be decided","component":"Backbone network","resolution":"FIXED","status":"RESOLVED","start_date":1779570000,"end_date":1779595200,"tags":["network","maintenance","renater","paris","nancy"],"url":"https://intranet.grid5000.fr/bugzilla/show_bug.cgi?id=18236","content":"----------------------------------------------------------------------------\nN°Ticket                                : 169156\nType de ticket                                : MAINTENANCE\nEtat du ticket                                : Ouvert\n----------------------------------------------------------------------------\nEmetteur                                : NOC-RENATER\nElément concerné                        : Liaisons RENATER\nLocalisation de la Maintenance                : REIMS-NANCY\nImpact sur le service                        : MAJEUR\nService(s) impacté(s)                        :\n----------------------------------------------------------------------------\nDébut de Maintenance                        : 23/05/2026 23:00 CET/CEST\nFin prévue de la Maintenance                : 24/05/2026 06:00 CET/CEST\nDurée de l'impact (minutes)                : 0\n----------------------------------------------------------------------------\nDate/Heure Ouverture (du ticket)        : 24/05/2026 11:21 CET/CEST\n----------------------------------------------------------------------------\n\nDescription de la maintenance :\nMaintenance avec impact sur la liaison REIMS - NANCY\n\nCette maintenance provoquera une coupure sur les services suivants :\n\n100G-OTN-Reims[PE] Nancy[PE]-IP\n10G-OTN-Nancy[PE] Paris1[PE]--GRID5K\n10G-OTN-Reims[PE] Nancy[PE]\n\n\n----------------------------------------------------------------------------\nNOC-RENATER   Tél. : 0800 77 47 95 (+33 800 77 47 95)\nEmail : noc-renater@noc.renater.fr\n----------------------------------------------------------------------------\n---------------------------------------------------------------------------\nN°Ticket                                : 169156\nType de ticket                                : MAINTENANCE\nEtat du ticket                                : Fermé\n----------------------------------------------------------------------------\nEmetteur                                : NOC-RENATER\nElément concerné                        : Liaisons RENATER\nLocalisation de la Maintenance                : REIMS-NANCY\nImpact sur le service                        : MAJEUR\nService(s) impacté(s)                        :\n----------------------------------------------------------------------------\nDébut de Maintenance                        : 23/05/2026 23:00 CET/CEST\nFin prévue de la Maintenance                : 24/05/2026 06:00 CET/CEST\nDurée de l'impact (minutes)                : 216\n----------------------------------------------------------------------------\nDate/Heure Ouverture (du ticket)        : 24/04/2026 11:21 CET/CEST\nDate/Heure Fermeture (du ticket)        : 24/05/2026 03:44 CET/CEST\n----------------------------------------------------------------------------\n\nDescription de la maintenance :\nMaintenance avec impact sur la liaison REIMS - NANCY\n\nCette maintenance provoquera une coupure sur les services suivants :\n\n100G-OTN-Reims[PE] Nancy[PE]-IP\n10G-OTN-Nancy[PE] Paris1[PE]--GRID5K\n10G-OTN-Reims[PE] Nancy[PE]\n\nInformation :\nMaintenance terminée.\n\n----------------------------------------------------------------------------\nNOC-RENATER   Tél. : 0800 77 47 95 (+33 800 77 47 95)\nEmail : noc-renater@noc.renater.fr\n----------------------------------------------------------------------------"},{"timezone":"CEST","title":"#Maintenance at #Rennes from 2026-05-21@08:00 to 2026-05-21@17:00 : Site unavailable","id":18248,"reporter":"Noah Mandler","assigned_to":"Support Staff","updated":1779440924,"created":1777896961,"priority":"P?","severity":"?: severity unknown, to be decided","component":"@Rennes","resolution":"FIXED","status":"RESOLVED","start_date":1779343200,"end_date":1779375600,"tags":["maintenance","rennes"],"url":"https://intranet.grid5000.fr/bugzilla/show_bug.cgi?id=18248","content":""},{"timezone":"CEST","title":"#Maintenance at #Toulouse from 2026-05-21@08:00 to 2026-05-21@18:00 : site services maintenance","id":18288,"reporter":"Julien Lelaurain","assigned_to":"Support Staff","updated":1779370304,"created":1779091502,"priority":"P?","severity":"?: severity unknown, to be decided","component":"@Toulouse","resolution":"FIXED","status":"RESOLVED","start_date":1779343200,"end_date":1779379200,"tags":["maintenance","toulouse"],"url":"https://intranet.grid5000.fr/bugzilla/show_bug.cgi?id=18288","content":"- Test de coupure du lien backbone suite à la mise à jour de nat.toulouse en trixie\n\nSi possible :\n- Activer le bonding sur les pve ?\n- Passer en trixie les machines dhcp et nfs ?\n* Màj OS VMs\n* Màn trixie pve-[1-2], srv-data, dhcp."},{"timezone":"CEST","title":"#Maintenance at #Lille from 2026-05-21@08:00 to 2026-05-21@17:00 : Site unavailable","id":18295,"reporter":"Hugo Dominois","assigned_to":"Hugo Dominois","updated":1779358678,"created":1779196240,"priority":"P?","severity":"?: severity unknown, to be decided","component":"@Lille","resolution":"FIXED","status":"RESOLVED","start_date":1779343200,"end_date":1779375600,"tags":["maintenance","lille"],"url":"https://intranet.grid5000.fr/bugzilla/show_bug.cgi?id=18295","content":""},{"timezone":"CEST","title":"#Maintenance at #Sophia from 2026-05-21@08:00 to 2026-05-21@12:00 : Site unavailable - Needrestart","id":18271,"reporter":"Lancelot Doan","assigned_to":"Lancelot Doan","updated":1779358593,"created":1778480413,"priority":"P?","severity":"?: severity unknown, to be decided","component":"@Sophia","resolution":"FIXED","status":"RESOLVED","start_date":1779343200,"end_date":1779357600,"tags":["maintenance","sophia"],"url":"https://intranet.grid5000.fr/bugzilla/show_bug.cgi?id=18271","content":""},{"timezone":"CEST","title":"#Maintenance at #ALL #SITES from 2026-05-21@8:30to 2026-05-21@9:00 : jupyterhub, docker-cache, g5kfw, packages and UMS will be unavailable","id":18294,"reporter":"Hugo Dominois","assigned_to":"Hugo Dominois","updated":1779354226,"created":1779196197,"priority":"P?","severity":"?: severity unknown, to be decided","component":"@All sites","resolution":"FIXED","status":"RESOLVED","start_date":1779314400,"end_date":1779314400,"tags":["maintenance","all","sites"],"url":"https://intranet.grid5000.fr/bugzilla/show_bug.cgi?id=18294","content":""},{"timezone":"CEST","title":"#Maintenance at #Louvain from 2026-05-20@08:00 to 2026-05-20@13:00 : Site unavailable - pve upgrade (kwollect)","id":18297,"reporter":"Nathan Paulin","assigned_to":"Support Staff","updated":1779263120,"created":1779202436,"priority":"P?","severity":"high: should be fixed in next month","component":"@Louvain","resolution":"FIXED","status":"RESOLVED","start_date":1779256800,"end_date":1779274800,"tags":["maintenance","louvain"],"url":"https://intranet.grid5000.fr/bugzilla/show_bug.cgi?id=18297","content":""},{"timezone":"CET","title":"#Exceptional #electrical #maintenance at #Nancy from 2026-05-18@08:00 to 2026-05-19@18:00 : Site unavailable","id":18144,"reporter":"Nicolas Perrin","assigned_to":"Nicolas Perrin","updated":1779194129,"created":1774621895,"priority":"P?","severity":"med: should be fixed in 6 months","component":"@Nancy","resolution":"FIXED","status":"RESOLVED","start_date":1779084000,"end_date":1779206400,"tags":["exceptional","electrical","maintenance","nancy"],"url":"https://intranet.grid5000.fr/bugzilla/show_bug.cgi?id=18144","content":"Travaux électriques liés à la nouvelle salle HPC au DCML prévus les 18 et 19 mai 2026\nPas d'horaire précis pour le moment.\nSeuls les noeuds de calculs hébergés au DCML seront indisponibles.\nLes serveurs d'infra et les clusters gros, grele, gratouille ne devraient pas être impactés."},{"timezone":"CEST","title":"Electrical #Incident at #Nantes since 2026-05-14 04:44 : ecotype and ecotaxe cluster unavailable","id":18287,"reporter":"Alexis Bitaillou","assigned_to":"Alexis Bitaillou","updated":1779092357,"created":1779090849,"priority":"P?","severity":"high: should be fixed in next month","component":"@Nantes","resolution":"FIXED","status":"RESOLVED","start_date":1778709600,"end_date":1779092357,"tags":["incident","nantes"],"url":"https://intranet.grid5000.fr/bugzilla/show_bug.cgi?id=18287","content":""},{"timezone":"CEST","title":"#Maintenance at #Nantes from 2026-05-13@09:00 to 2026-05-13@12:00 : econome cluster unavailable","id":18281,"reporter":"Alexis Bitaillou","assigned_to":"Alexis Bitaillou","updated":1778663794,"created":1778589809,"priority":"P?","severity":"high: should be fixed in next month","component":"@Nantes","resolution":"FIXED","status":"RESOLVED","start_date":1778655600,"end_date":1778666400,"tags":["maintenance","nantes"],"url":"https://intranet.grid5000.fr/bugzilla/show_bug.cgi?id=18281","content":""},{"timezone":"CET","title":"#EXCEPTIONAL #INCIDENT at #Grenoble from 2025-11-26@10:30: Grenoble wattmetres shutdown for an undetermined time","id":17785,"reporter":"Pierre Jacquot","assigned_to":"Colin Regal Mezin","updated":1772018966,"created":1764149820,"priority":"P?","severity":"med: should be fixed in 6 months","component":"@Grenoble","resolution":"","status":"ASSIGNED","start_date":1764149400,"end_date":0,"tags":["exceptional","incident","grenoble"],"url":"https://intranet.grid5000.fr/bugzilla/show_bug.cgi?id=17785","content":"Due to an electrical safety issue with the wattmeters of Grenoble, we are forced to disable the monitoring service and to remove the wattmetres.\n\nWe will notify users when the problem is solved.\n\nSorry for the inconvenience,\nwattmetre1 and wattmetre2 services have been disabled on digwatt.grenoble.\n\nThey will be re-enabled when the wattmetres will operationnal again."},{"timezone":"CET","title":"#Incident at #Lille since 2026-02-05 : watt-meter unavailable on chirop cluster","id":17990,"reporter":"Alexis Bitaillou","assigned_to":"Hugo Dominois","updated":1771338561,"created":1771247104,"priority":"P?","severity":"med: should be fixed in 6 months","component":"@Lille","resolution":"","status":"ASSIGNED","start_date":1770246000,"end_date":0,"tags":["incident","lille"],"url":"https://intranet.grid5000.fr/bugzilla/show_bug.cgi?id=17990","content":"\nwattmetrev3-1-read.service est en état failed car /dev/wattmetrev3-1 n'existe pas."},{"timezone":"CET","title":"#EXCEPTIONAL #INCIDENT at #strasbourg since 2025-09-26@13:00: wattmetrev3-1 values unreadable","id":17759,"reporter":"Pierre Jacquot","assigned_to":"Pierre Jacquot","updated":1764078426,"created":1763473464,"priority":"P?","severity":"high: should be fixed in next month","component":"@Strasbourg","resolution":"","status":"ASSIGNED","start_date":1758884400,"end_date":0,"tags":["exceptional","incident","strasbourg"],"url":"https://intranet.grid5000.fr/bugzilla/show_bug.cgi?id=17759","content":"Le wattmetrev3-1 remonte beaucoup de valeurs NaN.\n\nIl faudrait débrancher/rebrancher ce wattmètre et voir si cela corrige ce problème."},{"timezone":"CEST","title":"#Exceptional #incident on #Nancy since 2025-07-10@05:40: gros-wattmetre2 returns invalid metrics","id":17378,"reporter":"Pierre Jacquot","assigned_to":"Pierre Jacquot","updated":1764062724,"created":1752649866,"priority":"P?","severity":"low: should be fixed at some point","component":"@Nancy","resolution":"","status":"ASSIGNED","start_date":1752118800,"end_date":0,"tags":["exceptional","incident","nancy"],"url":"https://intranet.grid5000.fr/bugzilla/show_bug.cgi?id=17378","content":"Pendant la réu de perm d'hier, on a discuté du check-wattmetre-loss.service qui remonte une alerte \"mute port(s)\".\n\nNathan est déjà allé redémarrer le wattmètre en salle serveur, donc il n'y a plus grand choose à faire de ce coté là.\n\nEn utilisant une version modifiée à l'arrache de wattm-parser-core (https://gitlab.inria.fr/pijacquo/wattm-parser-core branche wattmetrev2), j'ai pu trouver que ce sont les ports 12 à 17 qui remontent des valeurs nulles.\n\nCes ports correspondent respectivement aux machines gros-59 à gros-64.\n\nJe pense qu'une première étape est d'aller en salle serveur et de débrancher/rebranches le câble d'alim de ces machines, pour voir si ça améliore les choses.\n\nPour info, le programme de débug se trouve sur wattmetre.nancy, dans wattm-parser-core.\nIl y a un petit fichier README à coté qui explique comment j'ai fait pour trouver les machines incriminées.\nj'ai débranché/rebranché le câble d'alim de ces machines\nJ'ai jeté un oeil aux logs, et ça n'a rien changé: les wattmetres renvoient toujours plein de 0.0W...\n\nJe commence à penser qu'il y a un vrai soucis matériel sur le wattmetre du cluster gros.\n\nEn jetant un oeil à nouveau je me suis aussi rendu compte que les ports 30 à 35 ont un soucis, donc ça concerne aussi les gros-[41-46].\nJ'ai demandé un dernier reboot à Nathan pour vérifier si l'erreur resterait sur les mêmes ports ou non.\n\nIl s'avère qu'après le reboot qu'à fait Nathan ce matin, plus aucun des ports du wattmètre ne fonctionne correctement...\n\ng5kadmin@wattmetre-gros.nancy.grid5000.fr(physical):~/wattm-parser-core$ sudo systemctl status check-wattmetre-loss.service \n× check-wattmetre-loss.service - Run check-wattmetre-loss\n     Loaded: loaded (/etc/systemd/system/check-wattmetre-loss.service; static)\n     Active: failed (Result: exit-code) since Thu 2025-08-21 15:52:49 CEST; 37min ago\nTriggeredBy: ● check-wattmetre-loss.timer\n    Process: 3775648 ExecStart=/usr/local/sbin/check-wattmetre-loss -s nancy -t 10 -d /data (code=exited, status=1/FAILURE)\n   Main PID: 3775648 (code=exited, status=1/FAILURE)\n        CPU: 484ms\n\nAug 21 15:52:48 wattmetre-gros systemd[1]: Starting check-wattmetre-loss.service - Run check-wattmetre-loss...\nAug 21 15:52:49 wattmetre-gros check-wattmetre-loss[3775648]: 2025-08-21T15,gros-wattmetre2,0,158450,100.00%\nAug 21 15:52:49 wattmetre-gros systemd[1]: check-wattmetre-loss.service: Main process exited, code=exited, status=1/FAILURE\nAug 21 15:52:49 wattmetre-gros systemd[1]: check-wattmetre-loss.service: Failed with result 'exit-code'.\nAug 21 15:52:49 wattmetre-gros systemd[1]: Failed to start check-wattmetre-loss.service - Run check-wattmetre-loss.\n\nSi on souhaite garder des wattmètres sur le cluster Gros, il va probablement falloir contacter Omegawatt/Adecwatt pour voir ce qu'on peut faire.\nJe voulais arrêter les services liés aux wattmètres pour ne plus que ça génère d'erreur dans icinga, sauf qu'en me loggant sur le serveur wattmetre-gros.nancy j'ai vu que maintenant seul les ports 12 à 17 du wattmètre2 posent problème. \nÇa concerne gros-59 à 64.\n\nJe pense qu'une solution intermédiaire serait de retirer ces 5 noeuds du monitoring des wattmètres en attendant de trouver une solution plus pérenne.\nJ'essaie de faire ça et je vois si ça fonctionne.\n(In reply to Pierre Jacquot from comment #5)\n> Je pense qu'une solution intermédiaire serait de retirer ces 5 noeuds du\n> monitoring des wattmètres en attendant de trouver une solution plus pérenne.\n> J'essaie de faire ça et je vois si ça fonctionne.\n\nEnfait ça n'est pas possible.\nLe service check-wattmetre-loss utilise la ref-api pour voir quels sont les ports sur lesquels faire des stats, pas le fichier /etc/kwollect/wattmetre-mapping.conf.\n\nSi on veut pouvoir faire ça, il faudrait modifier le ref-repo et le service check-wattmetre-loss.\n\nJe vais acknowledger l'erreur en attendant de voir ce qu'on peut faire.\nPlus de wattmètres; plus de problème.\n\nhttps://gitlab.inria.fr/grid5000/reference-repository/-/merge_requests/1586\n\nJe ferme le bug.\nJ'ai remis les wattmètres dans la description du ref-repo & regénéré les confs kwollect + DNS (alias wattmetre-gros2.nancy) vu qu'il faut finalement garder les wattmètres en fonctionnement malgré le fait qu'ils fonctionnent de manière aléatoire. \n\nJe passe le bug à low vu que le consensus actuel est de laisser le monitoring en l'état à Nancy."},{"timezone":"CEST","title":"#EXCEPTIONAL #INCIDENT at #Lyon since 2025-10-24@14:30: wattmetrev3-2 unavailable","id":17700,"reporter":"Pierre Jacquot","assigned_to":"Pierre Jacquot","updated":1761654301,"created":1761312311,"priority":"P?","severity":"med: should be fixed in 6 months","component":"@Lyon","resolution":"","status":"ASSIGNED","start_date":1761309000,"end_date":0,"tags":["exceptional","incident","lyon"],"url":"https://intranet.grid5000.fr/bugzilla/show_bug.cgi?id=17700","content":"In order to repair wattmetrev3-1 we had to cannibalize wattmetrev3-2.\n\nAs a result, neowise-[6-10] cannot be monitored until the situation with wattmetrev3-1 is resolved."}]