XCP-ng
    • Categories
    • Recent
    • Tags
    • Popular
    • Users
    • Groups
    • Register
    • Login
    1. Home
    2. peo
    3. Posts
    P
    Offline
    • Profile
    • Following 0
    • Followers 0
    • Topics 8
    • Posts 53
    • Groups 0

    Posts

    Recent Best Controversial
    • RE: Failed backup jobs since updating

      As it's a recently introduced bug, it will probably be fixed very soon. Got 'failure' on my weekly batch of backups during the night (4 machines using 'sequence'). This is real backups (but deltas) going to external storage.

      "Global status: failure", but still transfers to storage seems to go through:

      {
        "data": {
          "mode": "delta",
          "reportWhen": "failure"
        },
        "id": "1773627675367",
        "jobId": "b0f7dd0c-07c9-4cd8-938b-f519b0961b8d",
        "jobName": "ubu-01",
        "message": "backup",
        "scheduleId": "2e16be52-fc45-45d3-b73b-965d83235fa7",
        "start": 1773627675367,
        "status": "failure",
        "infos": [
          {
            "data": {
              "vms": [
                "31788a46-9fb1-3532-36b4-d9ad90c7d310"
              ]
            },
            "message": "vms"
          }
        ],
        "tasks": [
          {
            "data": {
              "type": "VM",
              "id": "31788a46-9fb1-3532-36b4-d9ad90c7d310",
              "name_label": "ubu-01"
            },
            "id": "1773627954874",
            "message": "backup VM",
            "start": 1773627954874,
            "status": "failure",
            "tasks": [
              {
                "id": "1773627954887",
                "message": "clean-vm",
                "start": 1773627954887,
                "status": "failure",
                "warnings": [
                  {
                    "data": {
                      "path": "/xo-vm-backups/31788a46-9fb1-3532-36b4-d9ad90c7d310/vdis/b0f7dd0c-07c9-4cd8-938b-f519b0961b8d/35e6144b-5e78-4ae7-816f-abf21e73aeda/data/6d9d6dca-0803-4cc9-8e79-e049203d702e.vhd"
                    },
                    "message": "no alias references VHD"
                  }
                ],
                "end": 1773627980060,
                "result": {
                  "$fault": "server",
                  "$metadata": {
                    "httpStatusCode": 500,
                    "requestId": "12284EA595C10B99",
                    "extendedRequestId": "MTIyODRFQTU5NUMxMEI5OTEyMjg0RUE1OTVDMTBCOTkxMjI4NEVBNTk1QzEwQjk5MTIyODRFQTU5NUMxMEI5OQ==",
                    "attempts": 3,
                    "totalRetryDelay": 200
                  },
                  "name": "InternalError",
                  "Code": "InternalError",
                  "message": "Internal Error",
                  "stack": "InternalError: Internal Error\n    at ProtocolLib.getErrorSchemaOrThrowBaseException (/opt/xo/xo-builds/xen-orchestra-202603131938/node_modules/@aws-sdk/core/dist-cjs/submodules/protocols/index.js:69:67)\n    at AwsRestXmlProtocol.handleError (/opt/xo/xo-builds/xen-orchestra-202603131938/node_modules/@aws-sdk/core/dist-cjs/submodules/protocols/index.js:1801:65)\n    at AwsRestXmlProtocol.deserializeResponse (/opt/xo/xo-builds/xen-orchestra-202603131938/node_modules/@smithy/core/dist-cjs/submodules/protocols/index.js:309:24)\n    at process.processTicksAndRejections (node:internal/process/task_queues:103:5)\n    at async /opt/xo/xo-builds/xen-orchestra-202603131938/node_modules/@smithy/core/dist-cjs/submodules/schema/index.js:26:24\n    at async /opt/xo/xo-builds/xen-orchestra-202603131938/node_modules/@aws-sdk/middleware-sdk-s3/dist-cjs/index.js:386:20\n    at async /opt/xo/xo-builds/xen-orchestra-202603131938/node_modules/@smithy/middleware-retry/dist-cjs/index.js:254:46\n    at async /opt/xo/xo-builds/xen-orchestra-202603131938/node_modules/@aws-sdk/middleware-sdk-s3/dist-cjs/index.js:63:28\n    at async /opt/xo/xo-builds/xen-orchestra-202603131938/node_modules/@aws-sdk/middleware-sdk-s3/dist-cjs/index.js:90:20\n    at async /opt/xo/xo-builds/xen-orchestra-202603131938/node_modules/@aws-sdk/middleware-logger/dist-cjs/index.js:5:26"
                }
              },
              {
                "id": "1773627983950",
                "message": "snapshot",
                "start": 1773627983950,
                "status": "success",
                "end": 1773627992689,
                "result": "08492698-4a00-4412-4a22-ad1da6114206"
              },
              {
                "data": {
                  "id": "c4e665e9-68c0-4824-b616-8e397350d44d",
                  "isFull": false,
                  "type": "remote"
                },
                "id": "1773627992689:0",
                "message": "export",
                "start": 1773627992689,
                "status": "failure",
                "tasks": [
                  {
                    "id": "1773627998066",
                    "message": "transfer",
                    "start": 1773627998066,
                    "status": "success",
                    "end": 1773629436434,
                    "result": {
                      "size": 31975276544
                    }
                  },
                  {
                    "id": "1773629446409",
                    "message": "clean-vm",
                    "start": 1773629446409,
                    "status": "failure",
                    "warnings": [
                      {
                        "data": {
                          "path": "/xo-vm-backups/31788a46-9fb1-3532-36b4-d9ad90c7d310/vdis/b0f7dd0c-07c9-4cd8-938b-f519b0961b8d/35e6144b-5e78-4ae7-816f-abf21e73aeda/data/6d9d6dca-0803-4cc9-8e79-e049203d702e.vhd"
                        },
                        "message": "no alias references VHD"
                      }
                    ],
                    "end": 1773629506113,
                    "result": {
                      "$fault": "server",
                      "$metadata": {
                        "httpStatusCode": 500,
                        "requestId": "12284EA595C154A2",
                        "extendedRequestId": "MTIyODRFQTU5NUMxNTRBMjEyMjg0RUE1OTVDMTU0QTIxMjI4NEVBNTk1QzE1NEEyMTIyODRFQTU5NUMxNTRBMg==",
                        "attempts": 3,
                        "totalRetryDelay": 144
                      },
                      "name": "InternalError",
                      "Code": "InternalError",
                      "message": "Internal Error",
                      "stack": "InternalError: Internal Error\n    at ProtocolLib.getErrorSchemaOrThrowBaseException (/opt/xo/xo-builds/xen-orchestra-202603131938/node_modules/@aws-sdk/core/dist-cjs/submodules/protocols/index.js:69:67)\n    at AwsRestXmlProtocol.handleError (/opt/xo/xo-builds/xen-orchestra-202603131938/node_modules/@aws-sdk/core/dist-cjs/submodules/protocols/index.js:1801:65)\n    at AwsRestXmlProtocol.deserializeResponse (/opt/xo/xo-builds/xen-orchestra-202603131938/node_modules/@smithy/core/dist-cjs/submodules/protocols/index.js:309:24)\n    at process.processTicksAndRejections (node:internal/process/task_queues:103:5)\n    at async /opt/xo/xo-builds/xen-orchestra-202603131938/node_modules/@smithy/core/dist-cjs/submodules/schema/index.js:26:24\n    at async /opt/xo/xo-builds/xen-orchestra-202603131938/node_modules/@aws-sdk/middleware-sdk-s3/dist-cjs/index.js:386:20\n    at async /opt/xo/xo-builds/xen-orchestra-202603131938/node_modules/@smithy/middleware-retry/dist-cjs/index.js:254:46\n    at async /opt/xo/xo-builds/xen-orchestra-202603131938/node_modules/@aws-sdk/middleware-sdk-s3/dist-cjs/index.js:63:28\n    at async /opt/xo/xo-builds/xen-orchestra-202603131938/node_modules/@aws-sdk/middleware-sdk-s3/dist-cjs/index.js:90:20\n    at async /opt/xo/xo-builds/xen-orchestra-202603131938/node_modules/@aws-sdk/middleware-logger/dist-cjs/index.js:5:26"
                    }
                  }
                ],
                "end": 1773629506114
              }
            ],
            "end": 1773629506114
          }
        ],
        "end": 1773629506114
      }
      

      This did not happen with the previous version (but that is more than a month old, so I'm not going back yet)

      posted in Backup
      P
      peo
    • RE: Failed backup jobs since updating

      I thought I solved it.. but now the problem is back again:

      Yesterday, 14 March, I did the following:

      • deleted the snapshots on the running Deb12-XO machine, so it was clean
      • deleted the clones of the machine
      • ran the backup job maually - with success (transferred about 30G to the other host)
        (still on the "old" XO version)

      Checked the destination host - no clone (backup) of the VM found.

      • manually created two (full, not quick) clones of the VM so it won't be lost of anything goes sideways
      • updated XO to the latest (2aff8)
      • let it do the relication according to the schedule
      • 21:00 replication success

      Next morning (15 Mar) 09:00 replication also ran without any issues - at least according to the log.

      • no additional copy of the VM on the destination host (retention is set to 2, so I should have one 21:00 and room for the 09:00 too)

      Evening replication, 21:00, failed. Got that same error message as before:

      VM Backup report

      Global status : failure 🚨

      Job ID: 883e2ee8-00c8-43f8-9ecd-9f9aa7aa01d1
      Run ID: 1773604800005
      Mode: delta
      Start time: Sunday, March 15th 2026, 9:00:00 pm
      End time: Sunday, March 15th 2026, 9:00:13 pm
      Duration: a few seconds
      Successes: 0 / 1
      Transfer size: 126 MiB
      

      1 Failure
      Deb12-XO
      Debian 12 XO self-install

      Pool id: 4cc74549-71c3-31d5-f204-7106e90acd1e
      UUID: 30829107-2a1b-6b20-a08a-f2c1e612b2ee
      Start time: Sunday, March 15th 2026, 9:00:02 pm
      End time: Sunday, March 15th 2026, 9:00:13 pm
      Duration: a few seconds
      Error: _removeUnusedSnapshots don't handle vdi related to multiple VMs Deb12-XO - Deb12-XO - (20260315T080004Z) and [XO Backup Deb12-XO] Deb12-XO
      

      =
      I notice the name of the snapshot seems odd:
      [XO Backup Deb12-XO] Deb12-XO - Deb12-XO

      Maybe just a new naming convention, but I found the old one better (for my Admin VM):
      Admin Ubuntu 24 - Admin Ubuntu 24 - (20260201T125508Z)
      Admin Ubuntu 24 - Admin Ubuntu 24 - (20260208T125508Z)
      (I assume the double "Admin Ubuntu 24" is because the backup job name is the same as the machine name)

      posted in Backup
      P
      peo
    • Failed backup jobs since updating

      Yesterday, I installed the patches on my three hosts and rebooted them, directly followed by updating XO on my Debian machine to the latest (2cd5b at that time).
      Since then, both the evening-replication (21:00) of my Deb12-XO host failed, as well as the morning-replication (09:00). This one has never failed before and is (was) being replicated to another host on the same network.

      The mail-report says:
      Debian 12 XO self-install

      Pool id: 4cc74549-71c3-31d5-f204-7106e90acd1e
      UUID: 30829107-2a1b-6b20-a08a-f2c1e612b2ee
      Start time: Friday, March 13th 2026, 9:00:01 am
      End time: Friday, March 13th 2026, 9:00:28 am
      Duration: a few seconds
      Error: _removeUnusedSnapshots don't handle vdi related to multiple VMs Deb12-XO - Deb12-XO - (20260312T200004Z) and [XO Backup Deb12-XO] Deb12-XO
      

      As my XO-installation is outdated, I will update again and let it retry the replication without any other changes.

      Also, this is probably nothing else than an annoying incorrect error message from microsoft:
      74296a2e-ef15-4e92-afc1-82a851a57462-image.jpeg
      What I did: followed the link in "About" to the current commit, went back to main, and clicked the link to all commits to see what I've missed (4 commits after I update XO)

      posted in Backup
      P
      peo
    • RE: Continuous Replication jobs creates full backups every time since 2025-09-06 (xo from source)

      @florent said in Continuous Replication jobs creates full backups every time since 2025-09-06 (xo from source):

      @Andrew @peo
      are you using purge snapshot data ?
      In both case, can you try disabling it and disabling CBT on the relevant VMs ?

      Yes, I was using CBT + purge snapshots, but that might not have been the reason for the sluggish transfer speeds with the later versions of XO.

      I discovered that the SSD on my destination host were going bad, a write of a /dev/null-filled test file (1GB then 5GB) on the device started at expected 450MB/s, but slowed down to less than 10MB/s. Found a bunch of unexpected errors (because the disk was more or less new) in the logs by dmesg.
      Doing the replication to another host made it fast even when transferred in full.
      CBT+delete = still "Backup fell back to a full" (this was the third since my XO update)
      CBT w/o delete = first backup=full transfer (4 min) and garbage collection (a few more minutes), second=delta transfer (a few seconds backup time)

      posted in Backup
      P
      peo
    • RE: Continuous Replication jobs creates full backups every time since 2025-09-06 (xo from source)

      I'm currently testing the "fixed" version. At the time I ran the replications the first time, I expected and accepted it doing new "first" full backups/transfers, but even this small, singe-disk VM (the one for my XO) is still doing full transfers:
      fe04762a-02ed-4b4e-9e14-fcb7be8330d9-image.png

      This simple operation previously taking 2-5 minutes now takes 44 minutes:
      39bc2c0a-93dc-4627-8901-e24fb5ddf729-image.png

      Compared to the previous "normal" backup time:
      19407049-8301-4a15-8afd-d6e3f78f36c6-image.png

      The same goes for "Admin Ubuntu 24", single but slightly larger disk (50GB) which yesterday took 4 hours (initial "new" "first" backup, so I don't know if it still takes that time)

      posted in Backup
      P
      peo
    • RE: Continuous Replication jobs creates full backups every time since 2025-09-06 (xo from source)

      Too bad it becomes that unexplainable slow when it has to fall back to a full backup:
      f971760e-3157-4352-a2e8-c0060aada73a-image.png

      The first (full) backup was 10x faster:
      19a77237-047f-4813-b72b-9c9b3459e888-image.png

      posted in Backup
      P
      peo
    • RE: Continuous Replication jobs creates full backups every time since 2025-09-06 (xo from source)

      @olivierlambert said in Continuous Replication jobs creates full backups every time since 2025-09-06 (xo from source):

      No, no confirmation so far, that's why we need more to investigate πŸ™‚

      Great (or maybe not) news.. At least on my system this is very easy to replicate.. On a tiny test machine that only have had a normal backup before, first time doing replication copy (to H2 SSD) it sends it as full as expected. Just did some simple operations on the machine (like apt update, no upgrade), then retried:
      96206924-73a5-484d-b089-ad9c673b4670-image.png

      posted in Backup
      P
      peo
    • RE: Continuous Replication jobs creates full backups every time since 2025-09-06 (xo from source)

      @olivierlambert Has the problem been confirmed ? I can try to pinpoint where the always-full replications were introduced, but more time efficient to do it on a machine that is fast do replicate, not the one that now takes 2.5 hours (a few minutes for the diff before it borked starting at the backup made 6 Sept)

      posted in Backup
      P
      peo
    • RE: Continuous Replication jobs creates full backups every time since 2025-09-06 (xo from source)

      @olivierlambert said in Continuous Replication jobs creates full backups every time since 2025-09-06 (xo from source):

      git bissect

      That might be quite a bunch of commits in between the working one and when I discovered the problems (I actually let it do it's thing a couple of days, quickly filling up the backup drive)
      I keep only the current and the previous build of XO, so I only know it's somewhere between Aug 16 (from the 'lastlog' to find out when I was logged in) and late Sep 5.

      posted in Backup
      P
      peo
    • RE: Continuous Replication jobs creates full backups every time since 2025-09-06 (xo from source)

      Anyone else experiencing the same problem, or is it just a problem of my imagination?

      Recap:

      Since 2025-09-06, all Continuous Replication jobs are creating full backups instead of incrementals.

      Environment: XO from source, updated to commit c2144 (2 commits behind at the time of test).

      Replication target: a standalone host that is part of the pool, on the same local network, with local SSD storage.

      Logs and status screenshots were attached in my first post.

      What I’ve done so far:
      2025-09-11

      • Updated Xen Orchestra to the latest commits (as of 2025-09-11).
      • Restarted the XO VM and the source VM after update.

      2025-09-12:

      • Applied pool patches as they became available.
      • Installed host updates and rebooted hosts.
      • Restarted the source and target hosts (since all hosts were restarted).
      • Verified behavior again today β†’ still a full backup for the β€œAdmin Ubuntu 24” VM.

      Observation:
      The issue began exactly after the 2025-09-05 updates (which included a set of host patches).
      Since then, every daily replication runs as a full.

      posted in Backup
      P
      peo
    • Continuous Replication jobs creates full backups every time since 2025-09-06 (xo from source)

      Discarded my start of this post yesterday because I was too far behind with the updates. I yesterday updated to Xen Orchestra, commit c2144 (currently 2 commits behind), and the problem remains.
      I have, after the update of XO restarted that machine, also updated and restarted the other (first below) machine.

      Replication is transferred to a host with its own local storage on SSD.

      d92aac46-b207-4cf5-95ce-47936e05cc5a-image.png

      {
        "data": {
          "mode": "delta",
          "reportWhen": "failure"
        },
        "id": "1757507401899",
        "jobId": "0bb53ced-4d52-40a9-8b14-7cd1fa2b30fe",
        "jobName": "Admin Ubuntu 24",
        "message": "backup",
        "scheduleId": "69a05a67-c43b-4d23-b1e8-ada77c70ccc4",
        "start": 1757507401899,
        "status": "success",
        "infos": [
          {
            "data": {
              "vms": [
                "1728e876-5644-2169-6c62-c764bd8b6bdf"
              ]
            },
            "message": "vms"
          }
        ],
        "tasks": [
          {
            "data": {
              "type": "VM",
              "id": "1728e876-5644-2169-6c62-c764bd8b6bdf",
              "name_label": "Admin Ubuntu 24"
            },
            "id": "1757507403766",
            "message": "backup VM",
            "start": 1757507403766,
            "status": "success",
            "tasks": [
              {
                "id": "1757507404364",
                "message": "snapshot",
                "start": 1757507404364,
                "status": "success",
                "end": 1757507406083,
                "result": "79b28f7e-ded1-ed95-4b3b-005f64e69796"
              },
              {
                "data": {
                  "id": "9d2121f8-6839-39d4-4e90-850a5b6f1bbb",
                  "isFull": false,
                  "name_label": "Local h2 SSD new",
                  "type": "SR"
                },
                "id": "1757507406083:0",
                "message": "export",
                "start": 1757507406083,
                "status": "success",
                "tasks": [
                  {
                    "id": "1757507408827",
                    "message": "transfer",
                    "start": 1757507408827,
                    "status": "success",
                    "end": 1757512215875,
                    "result": {
                      "size": 52571406336
                    }
                  }
                ],
                "end": 1757512216056
              }
            ],
            "warnings": [
              {
                "message": "Backup fell back to a full"
              }
            ],
            "infos": [
              {
                "message": "will delete snapshot data"
              },
              {
                "data": {
                  "vdiRef": "OpaqueRef:47180590-40ab-f301-57bf-85c2fbe9b51d"
                },
                "message": "Snapshot data has been deleted"
              }
            ],
            "end": 1757512217031
          }
        ],
        "end": 1757512217032
      }
      

      db7401ef-485f-42bc-808f-5c70ed55a931-image.png

      {
        "data": {
          "mode": "delta",
          "reportWhen": "failure"
        },
        "id": "1757574000010",
        "jobId": "883e2ee8-00c8-43f8-9ecd-9f9aa7aa01d1",
        "jobName": "Deb12-XO",
        "message": "backup",
        "scheduleId": "19aab592-cd48-431e-a82c-525eba60fcc7",
        "start": 1757574000010,
        "status": "success",
        "infos": [
          {
            "data": {
              "vms": [
                "30829107-2a1b-6b20-a08a-f2c1e612b2ee"
              ]
            },
            "message": "vms"
          }
        ],
        "tasks": [
          {
            "data": {
              "type": "VM",
              "id": "30829107-2a1b-6b20-a08a-f2c1e612b2ee",
              "name_label": "Deb12-XO"
            },
            "id": "1757574001999",
            "message": "backup VM",
            "start": 1757574001999,
            "status": "success",
            "tasks": [
              {
                "id": "1757574002518",
                "message": "snapshot",
                "start": 1757574002518,
                "status": "success",
                "end": 1757574004197,
                "result": "73a6e7d5-fcd6-e65f-ddd8-9dd700596505"
              },
              {
                "data": {
                  "id": "9d2121f8-6839-39d4-4e90-850a5b6f1bbb",
                  "isFull": false,
                  "name_label": "Local h2 SSD new",
                  "type": "SR"
                },
                "id": "1757574004197:0",
                "message": "export",
                "start": 1757574004197,
                "status": "success",
                "tasks": [
                  {
                    "id": "1757574006959",
                    "message": "transfer",
                    "start": 1757574006959,
                    "status": "success",
                    "end": 1757576397226,
                    "result": {
                      "size": 28974252032
                    }
                  }
                ],
                "end": 1757576397820
              }
            ],
            "warnings": [
              {
                "message": "Backup fell back to a full"
              }
            ],
            "infos": [
              {
                "message": "will delete snapshot data"
              },
              {
                "data": {
                  "vdiRef": "OpaqueRef:b0ab2f7c-6606-bc15-cdcc-f7f002f3181a"
                },
                "message": "Snapshot data has been deleted"
              }
            ],
            "end": 1757576398882
          }
        ],
        "end": 1757576398883
      }
      
      posted in Backup
      P
      peo
    • RE: Misleading messages during restore from backup

      @olivierlambert Thanks, that explains the "on .." part of the message, and as you mention, it's not a "problem", but just confusing in the context of that full message.
      The optimal would of course be that the task was launched on the host that is "closest" to the SR, in this case a local SR on xcp-ng-2.

      This one (unrelated list of previous tasks) makes the message more clear what the "on.." part is about:

      xo-tasks.png

      posted in Backup
      P
      peo
    • RE: Misleading messages during restore from backup

      @olivierlambert but the "importing content" message is incorrect:
      "on SR Local h2 SSD new (on xcp-ng-1)"

      That's the problem (not to where it's restored to, and has to be run from because it's a local SR)..

      In the message, what do "xcp-ng-1" have to do with this operation ? It's not involved at all..

      xo-local-SRs.png

      posted in Backup
      P
      peo
    • RE: Misleading messages during restore from backup

      @olivierlambert I can make it even more confusing πŸ™‚

      But first, you're correct about me misnaming the BR as "Remote SR".

      Just found out that the machine I backed up is NOT running on xcp-ng-1, but on xcp-ng-3 (I assumed it was xcp-ng-1 because of the message):

      xo-misleading-restore-source-machine.png

      posted in Backup
      P
      peo
    • RE: Misleading messages during restore from backup

      @olivierlambert The source machine for the backup is on local SR on 'xcp-ng-1'. I'm restoring it to a local SR on 'xcp-ng-2' (the backup is read from a remote SR)

      posted in Backup
      P
      peo
    • Misleading messages during restore from backup

      Hi,

      I have seen this before, but not reported it. During a restore of a backup of a VM, to another host, the message during restore is a bit misleading.
      I'm restoring from the remote named "xcp-ng-appservers" to (as indicated by the ongoing status) "Local h2 SSD new". The misleading part is "(on xcp-ng-1)", as I restore it to a SR on "xcp-ng-2" (and will run it from there).

      Maybe a simple fix would be to just make the meaning of the "on" a bit clearer, or just remove it (the backed up machine runs on 'xcp-ng-1', and I will have my duplicate from the latest backup started on 'xcp-ng-2')

      xo-misleading-restore-message.png

      posted in Backup
      P
      peo
    • RE: Backups started to fail again (overall status: failure, but both snapshot and transfer returns success)

      @olivierlambert no, and all VMs were working at the time before I rebooted the two hosts (not the third one, since that didn't have problem accessing /run/sr-mount/)

      I understand that 'df' will lock up if a NFS or SMB share does not respond, but ls the /run/sr-mount/ (without trying to access a subfolder) should have no reason to lock up (unless /run/sr-mount is not a ordinary folder, which it seems to be)

      posted in Backup
      P
      peo
    • RE: Backups started to fail again (overall status: failure, but both snapshot and transfer returns success)

      @olivierlambert I found a "solution" to the problem, by just rebooting the two involved hosts, but this might still be an issue somewhere (XO or even xcp-ng):

      At the time I started up the hosts after the power failure, the dependencies had already been started a long time before (mainly my internet connectivity and the NAS which holds one of the SRs). All three hosts have their local 2TB SSD as well for different purposes (faster disk access, temporary storage and replication from other hosts).

      I actually forgot to connect the network cable (unplugged because I reorganized the cables to the switch at the same time) to the third host (not involved in these recent problems) and found out that it seemed like it didn't start up properly (or at least, I did not get any video output from it when I was going to check its status after connecting the network cable), so I gave that one a hard reboot and got it up and running.

      Machines with their disks on the local SSDs of the two other hosts have worked fine since I powered them up, so what follows (and the replication issue) was not expected at all:

      Lock up on 'df' and 'ls /run/sr-mount/':

      [11:21 xcp-ng-1 ~]# df -h
      ^C
      [11:21 xcp-ng-1 ~]# ^C
      
      [11:21 xcp-ng-1 ~]# ls /run/sr-mount/
      ^C
      [11:22 xcp-ng-1 ~]# ls /run/
      

      ('ls /run/' worked fine)

      According to XO the disks were accessible and their content showed up as usual.

      posted in Backup
      P
      peo
    • RE: Backups started to fail again (overall status: failure, but both snapshot and transfer returns success)

      Since yesterday, even the replication jobs started to fail (I'm again 12 commits behind the current version, but other scheduled jobs continued to fail when I was up to date with XO)

      The replication is set to run from one host and store on the SSD on another. I had a power failure yesterday, but both hosts needed for this job (xcp-ng-1 and xcp-ng-2) was back up and running at the time the job was started.

      {
        "data": {
          "mode": "delta",
          "reportWhen": "failure"
        },
        "id": "1753705802804",
        "jobId": "0bb53ced-4d52-40a9-8b14-7cd1fa2b30fe",
        "jobName": "Admin Ubuntu 24",
        "message": "backup",
        "scheduleId": "69a05a67-c43b-4d23-b1e8-ada77c70ccc4",
        "start": 1753705802804,
        "status": "failure",
        "infos": [
          {
            "data": {
              "vms": [
                "1728e876-5644-2169-6c62-c764bd8b6bdf"
              ]
            },
            "message": "vms"
          }
        ],
        "tasks": [
          {
            "data": {
              "type": "VM",
              "id": "1728e876-5644-2169-6c62-c764bd8b6bdf",
              "name_label": "Admin Ubuntu 24"
            },
            "id": "1753705804503",
            "message": "backup VM",
            "start": 1753705804503,
            "status": "failure",
            "tasks": [
              {
                "id": "1753705804984",
                "message": "snapshot",
                "start": 1753705804984,
                "status": "success",
                "end": 1753712867640,
                "result": "4afbdcd9-818f-9e3d-555a-ad0943081c3f"
              },
              {
                "data": {
                  "id": "46f9b5ee-c937-ff71-29b1-520ba0546675",
                  "isFull": false,
                  "name_label": "Local h2 SSD",
                  "type": "SR"
                },
                "id": "1753712867640:0",
                "message": "export",
                "start": 1753712867640,
                "status": "interrupted"
              }
            ],
            "infos": [
              {
                "message": "will delete snapshot data"
              },
              {
                "data": {
                  "vdiRef": "OpaqueRef:c2504c79-d422-3f0a-d292-169d431e5aee"
                },
                "message": "Snapshot data has been deleted"
              }
            ],
            "end": 1753717484618,
            "result": {
              "name": "BodyTimeoutError",
              "code": "UND_ERR_BODY_TIMEOUT",
              "message": "Body Timeout Error",
              "stack": "BodyTimeoutError: Body Timeout Error\n    at FastTimer.onParserTimeout [as _onTimeout] (/opt/xo/xo-builds/xen-orchestra-202507262229/node_modules/undici/lib/dispatcher/client-h1.js:646:28)\n    at Timeout.onTick [as _onTimeout] (/opt/xo/xo-builds/xen-orchestra-202507262229/node_modules/undici/lib/util/timers.js:162:13)\n    at listOnTimeout (node:internal/timers:588:17)\n    at process.processTimers (node:internal/timers:523:7)"
            }
          }
        ],
        "end": 1753717484619
      }
      

      Also, the replication job for my Debian XO machine fails with the same 'timeout' problem.

      posted in Backup
      P
      peo
    • RE: Backups started to fail again (overall status: failure, but both snapshot and transfer returns success)

      Since I updated 'everything' involved yesterday, the problems remain (this night's backups failed with the similar problem). As I'm again 6 commits behind the current version, I cannot create a useful bug report, so I'll just update and wait for the next scheduled backups to run (nothing the night towards Thursday, the next sequence will run at the night towards Friday)

      posted in Backup
      P
      peo