patchpanel: add UMA metrics to ArcService

BUG=b:202679717
BUG=b:204752444
TEST=Flashed trogdor, started user session, visited
chrome://histograms#Network.Patchpanel.ArcService and observed events
being recorded.

Cq-Depend: chromium:3255193
Change-Id: I206303ed7dd5ea281365b85dd1707a8ae62bd9e0
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform2/+/3255472
Tested-by: Hugo Benichi <hugobenichi@google.com>
Reviewed-by: Jason Jeremy Iman <jasongustaman@chromium.org>
Reviewed-by: Garrick Evans <garrick@chromium.org>
Commit-Queue: Hugo Benichi <hugobenichi@google.com>
diff --git a/patchpanel/arc_service.cc b/patchpanel/arc_service.cc
index 7433966..ff3b9ea 100644
--- a/patchpanel/arc_service.cc
+++ b/patchpanel/arc_service.cc
@@ -23,6 +23,7 @@
 #include "patchpanel/guest_type.h"
 #include "patchpanel/mac_address_generator.h"
 #include "patchpanel/manager.h"
+#include "patchpanel/metrics.h"
 #include "patchpanel/minijailed_process_runner.h"
 #include "patchpanel/net_util.h"
 #include "patchpanel/scoped_ns.h"
@@ -35,6 +36,10 @@
 constexpr char kArcNetnsName[] = "arc_netns";
 constexpr char kArcIfname[] = "arc0";
 
+void RecordEvent(MetricsLibraryInterface* metrics, ArcServiceUmaEvent event) {
+  metrics->SendEnumToUMA(kArcServiceUmaEventMetrics, event);
+}
+
 bool IsAdbAllowed(ShillClient::Device::Type type) {
   static const std::set<ShillClient::Device::Type> adb_allowed_types{
       ShillClient::Device::Type::kEthernet,
@@ -62,22 +67,28 @@
 
 // Makes Android root the owner of /sys/class/ + |path|. |pid| is the ARC
 // container pid.
-void SetSysfsOwnerToAndroidRoot(uint32_t pid, const std::string& path) {
+bool SetSysfsOwnerToAndroidRoot(uint32_t pid, const std::string& path) {
   auto ns = ScopedNS::EnterMountNS(pid);
   if (!ns) {
     LOG(ERROR) << "Cannot enter mnt namespace for pid " << pid;
-    return;
+    return false;
   }
 
   const std::string sysfs_path = "/sys/class/" + path;
-  if (chown(sysfs_path.c_str(), kAndroidRootUid, kAndroidRootUid) == -1)
+  if (chown(sysfs_path.c_str(), kAndroidRootUid, kAndroidRootUid) == -1) {
     PLOG(ERROR) << "Failed to change ownership of " + sysfs_path;
+    return false;
+  }
+
+  return true;
 }
 
-void OneTimeContainerSetup(Datapath& datapath, uint32_t pid) {
+bool OneTimeContainerSetup(Datapath& datapath, uint32_t pid) {
   static bool done = false;
   if (done)
-    return;
+    return true;
+
+  bool success = true;
 
   // Load networking modules needed by Android that are not compiled in the
   // kernel. Android does not allow auto-loading of kernel modules.
@@ -93,6 +104,7 @@
       })) {
     LOG(ERROR) << "One or more required kernel modules failed to load."
                << " Some Android functionality may be broken.";
+    success = false;
   }
   // The xfrm modules needed for Android's ipsec APIs on kernels < 5.4.
   int major, minor;
@@ -106,9 +118,11 @@
       })) {
     LOG(ERROR) << "One or more required kernel modules failed to load."
                << " Some Android functionality may be broken.";
+    success = false;
   }
 
-  // Optional modules.
+  // Additional modules optional for CTS compliance but required for some
+  // Android features.
   if (!datapath.ModprobeAll({
           // This module is not available in kernels < 3.18
           "nf_reject_ipv6",
@@ -122,12 +136,16 @@
           "tun",
       })) {
     LOG(WARNING) << "One or more optional kernel modules failed to load.";
+    success = false;
   }
 
   // This is only needed for CTS (b/27932574).
-  SetSysfsOwnerToAndroidRoot(pid, "xt_idletimer");
+  if (!SetSysfsOwnerToAndroidRoot(pid, "xt_idletimer")) {
+    success = false;
+  }
 
   done = true;
+  return success;
 }
 
 // Creates the ARC management Device used for VPN forwarding, ADB-over-TCP.
@@ -165,10 +183,12 @@
 ArcService::ArcService(Datapath* datapath,
                        AddressManager* addr_mgr,
                        GuestMessage::GuestType guest,
+                       MetricsLibraryInterface* metrics,
                        Device::ChangeEventHandler device_changed_handler)
     : datapath_(datapath),
       addr_mgr_(addr_mgr),
       guest_(guest),
+      metrics_(metrics),
       device_changed_handler_(device_changed_handler),
       id_(kInvalidId) {
   arc_device_ = MakeArc0Device(addr_mgr, guest_);
@@ -263,7 +283,10 @@
 }
 
 bool ArcService::Start(uint32_t id) {
+  RecordEvent(metrics_, ArcServiceUmaEvent::kStart);
+
   if (IsStarted()) {
+    RecordEvent(metrics_, ArcServiceUmaEvent::kStartWithoutStop);
     LOG(WARNING) << "Already running - did something crash?"
                  << " Stopping and restarting...";
     Stop(id_);
@@ -286,7 +309,10 @@
     }
     arc_device_ifname = arc_device_->config().tap_ifname();
   } else {
-    OneTimeContainerSetup(*datapath_, id);
+    if (!OneTimeContainerSetup(*datapath_, id)) {
+      RecordEvent(metrics_, ArcServiceUmaEvent::kOneTimeContainerSetupError);
+      LOG(ERROR) << "One time container setup failed";
+    }
     if (!datapath_->NetnsAttachName(kArcNetnsName, id)) {
       LOG(ERROR) << "Failed to attach name " << kArcNetnsName << " to pid "
                  << id;
@@ -303,8 +329,10 @@
       return false;
     }
     // Allow netd to write to /sys/class/net/arc0/mtu (b/175571457).
-    SetSysfsOwnerToAndroidRoot(id,
-                               "net/" + arc_device_->guest_ifname() + "/mtu");
+    if (!SetSysfsOwnerToAndroidRoot(
+            id, "net/" + arc_device_->guest_ifname() + "/mtu")) {
+      RecordEvent(metrics_, ArcServiceUmaEvent::kSetVethMtuError);
+    }
   }
   id_ = id;
 
@@ -332,11 +360,14 @@
     return false;
   }
 
+  RecordEvent(metrics_, ArcServiceUmaEvent::kStartSuccess);
   return true;
 }
 
 void ArcService::Stop(uint32_t id) {
+  RecordEvent(metrics_, ArcServiceUmaEvent::kStop);
   if (!IsStarted()) {
+    RecordEvent(metrics_, ArcServiceUmaEvent::kStopBeforeStart);
     LOG(ERROR) << "ArcService was not running";
     return;
   }
@@ -364,14 +395,16 @@
   // Stop the bridge for the management interface arc0.
   if (guest_ == GuestMessage::ARC) {
     datapath_->RemoveInterface(ArcVethHostName(arc_device_->phys_ifname()));
-    if (!datapath_->NetnsDeleteName(kArcNetnsName))
+    if (!datapath_->NetnsDeleteName(kArcNetnsName)) {
       LOG(WARNING) << "Failed to delete netns name " << kArcNetnsName;
+    }
   }
 
   // Destroy allocated TAP devices if any, including the ARC management Device.
   for (auto* config : all_configs_) {
     if (config->tap_ifname().empty())
       continue;
+
     datapath_->RemoveInterface(config->tap_ifname());
     config->set_tap_ifname("");
   }
@@ -379,6 +412,7 @@
   datapath_->RemoveBridge(kArcBridge);
   LOG(INFO) << "Stopped ARC management Device " << *arc_device_.get();
   id_ = kInvalidId;
+  RecordEvent(metrics_, ArcServiceUmaEvent::kStopSuccess);
 }
 
 void ArcService::AddDevice(const std::string& ifname,
@@ -390,6 +424,8 @@
   if (ifname.empty())
     return;
 
+  RecordEvent(metrics_, ArcServiceUmaEvent::kAddDevice);
+
   if (devices_.find(ifname) != devices_.end()) {
     LOG(DFATAL) << "Attemping to add already tracked shill Device: " << ifname;
     return;
@@ -454,6 +490,7 @@
 
   device_changed_handler_.Run(*device, Device::ChangeEvent::ADDED, guest_);
   devices_.emplace(ifname, std::move(device));
+  RecordEvent(metrics_, ArcServiceUmaEvent::kAddDeviceSuccess);
 }
 
 void ArcService::RemoveDevice(const std::string& ifname) {
diff --git a/patchpanel/arc_service.h b/patchpanel/arc_service.h
index e178887..4ce503f 100644
--- a/patchpanel/arc_service.h
+++ b/patchpanel/arc_service.h
@@ -14,6 +14,7 @@
 
 #include <base/memory/weak_ptr.h>
 #include <gtest/gtest_prod.h>  // for FRIEND_TEST
+#include <metrics/metrics_library.h>
 
 #include "patchpanel/address_manager.h"
 #include "patchpanel/datapath.h"
@@ -27,10 +28,11 @@
 
 class ArcService {
  public:
-  // All pointers are required and cannot be null, and are owned by the caller.
+  // All pointers are required, cannot be null, and are owned by the caller.
   ArcService(Datapath* datapath,
              AddressManager* addr_mgr,
              GuestMessage::GuestType guest,
+             MetricsLibraryInterface* metrics,
              Device::ChangeEventHandler device_changed_handler);
   ArcService(const ArcService&) = delete;
   ArcService& operator=(const ArcService&) = delete;
@@ -72,11 +74,20 @@
   void ReleaseConfig(ShillClient::Device::Type type,
                      std::unique_ptr<Device::Config> config);
 
-  Datapath* datapath_;
-  AddressManager* addr_mgr_;
-  GuestMessage::GuestType guest_;
-  Device::ChangeEventHandler device_changed_handler_;
+  FRIEND_TEST(ArcServiceTest, NotStarted_AddDevice);
+  FRIEND_TEST(ArcServiceTest, NotStarted_AddRemoveDevice);
 
+  // Routing and iptables controller service, owned by Manager.
+  Datapath* datapath_;
+  // IPv4 prefix and address manager, owned by Manager.
+  AddressManager* addr_mgr_;
+  // Type of ARC environment, valid values are ARC_VM or ARC.
+  GuestMessage::GuestType guest_;
+  // UMA metrics client, owned by Manager.
+  MetricsLibraryInterface* metrics_;
+  // Manager callback used for notifying about virtual device creation and
+  // removal events.
+  Device::ChangeEventHandler device_changed_handler_;
   // A set of preallocated ARC interface configurations keyed by technology type
   // and used for setting up ARCVM TAP devices at VM booting time.
   std::map<ShillClient::Device::Type,
@@ -96,9 +107,6 @@
   // All shill Devices currently managed by shill, keyed by host interface name.
   std::map<std::string, ShillClient::Device::Type> shill_devices_;
 
-  FRIEND_TEST(ArcServiceTest, NotStarted_AddDevice);
-  FRIEND_TEST(ArcServiceTest, NotStarted_AddRemoveDevice);
-
   base::WeakPtrFactory<ArcService> weak_factory_{this};
 };
 
diff --git a/patchpanel/arc_service_test.cc b/patchpanel/arc_service_test.cc
index 790342c0..65e736a 100644
--- a/patchpanel/arc_service_test.cc
+++ b/patchpanel/arc_service_test.cc
@@ -17,6 +17,7 @@
 #include <base/callback_helpers.h>
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
+#include <metrics/metrics_library_mock.h>
 
 #include "patchpanel/address_manager.h"
 #include "patchpanel/guest_type.h"
@@ -59,12 +60,13 @@
   void SetUp() override {
     datapath_ = std::make_unique<MockDatapath>();
     addr_mgr_ = std::make_unique<AddressManager>();
+    metrics_ = std::make_unique<MetricsLibraryMock>();
     guest_devices_.clear();
   }
 
   std::unique_ptr<ArcService> NewService(GuestMessage::GuestType guest) {
     return std::make_unique<ArcService>(
-        datapath_.get(), addr_mgr_.get(), guest,
+        datapath_.get(), addr_mgr_.get(), guest, metrics_.get(),
         base::BindRepeating(&ArcServiceTest::DeviceHandler,
                             base::Unretained(this)));
   }
@@ -77,6 +79,7 @@
 
   std::unique_ptr<AddressManager> addr_mgr_;
   std::unique_ptr<MockDatapath> datapath_;
+  std::unique_ptr<MetricsLibraryMock> metrics_;
   std::map<std::string, Device::ChangeEvent> guest_devices_;
 };
 
diff --git a/patchpanel/manager.cc b/patchpanel/manager.cc
index e1fdc06..4571ac2 100644
--- a/patchpanel/manager.cc
+++ b/patchpanel/manager.cc
@@ -27,6 +27,7 @@
 #include <base/threading/thread_task_runner_handle.h>
 #include <brillo/key_value_store.h>
 #include <brillo/minijail/minijail.h>
+#include <metrics/metrics_library.h>
 
 #include "patchpanel/guest_type.h"
 #include "patchpanel/ipc.pb.h"
@@ -273,7 +274,7 @@
   GuestMessage::GuestType arc_guest =
       USE_ARCVM ? GuestMessage::ARC_VM : GuestMessage::ARC;
   arc_svc_ = std::make_unique<ArcService>(
-      datapath_.get(), &addr_mgr_, arc_guest,
+      datapath_.get(), &addr_mgr_, arc_guest, metrics_.get(),
       base::BindRepeating(&Manager::OnGuestDeviceChanged,
                           weak_factory_.GetWeakPtr()));
   cros_svc_ = std::make_unique<CrostiniService>(
diff --git a/patchpanel/metrics.h b/patchpanel/metrics.h
index 3cf6e64..950b1b2 100644
--- a/patchpanel/metrics.h
+++ b/patchpanel/metrics.h
@@ -9,6 +9,8 @@
 
 // UMA metrics name for patchpanel Manager Dbus API calls.
 constexpr char kDbusUmaEventMetrics[] = "Network.Patchpanel.Dbus";
+// UMA metrics name for ArcService events.
+constexpr char kArcServiceUmaEventMetrics[] = "Network.Patchpanel.ArcService";
 
 // UMA metrics events for |kDbusUmaEventMetrics|;
 enum class DbusUmaEvent {
@@ -47,6 +49,23 @@
   kMaxValue,
 };
 
+// UMA metrics events for |kArcServiceUmaEventMetrics|;
+enum class ArcServiceUmaEvent {
+  kUnknown = 0,
+  kStart = 1,
+  kStartSuccess = 2,
+  kStartWithoutStop = 3,
+  kStop = 4,
+  kStopSuccess = 5,
+  kStopBeforeStart = 6,
+  kAddDevice = 7,
+  kAddDeviceSuccess = 8,
+  kSetVethMtuError = 10,
+  kOneTimeContainerSetupError = 11,
+
+  kMaxValue,
+};
+
 }  // namespace patchpanel
 
 #endif  // PATCHPANEL_METRICS_H_