[v2] runqueue: add cpu/io pressure regulation

Message ID 20220704204409.3692354-1-aryaman.gupta@windriver.com
State Accepted, archived
Commit 502e05cbe67fb7a0e804dcc2cc0764a2e05c014f
Headers show
Series [v2] runqueue: add cpu/io pressure regulation | expand

Commit Message

Aryaman Gupta July 4, 2022, 8:44 p.m. UTC
Stop the scheduler from starting new tasks if the current cpu or io
pressure is above a certain threshold, specified through the
"BB_PRESSURE_MAX_SOME_{CPU|IO}" variables in conf/local.conf.

If the thresholds aren't specified, the default values are 100 for both
CPU and IO, which will have no impact on build times.
Arbitary lower limit of 1.0 results in a fatal error to avoid extremely
long builds. If the percentage limits are higher than 100, then the
default values are used and warnings are issued to inform users that the
specified limit is out of bounds.

Signed-off-by: Aryaman Gupta <aryaman.gupta@windriver.com>
Signed-off-by: Randy Macleod <randy.macleod@windriver.com>
---

* Changes in V2:
- Replace subprocess() calls with open()
- Rename BB variables to BB_PRESSURE_MAX_SOME_{CPU|IO}
- Skip the checking of pressure when no value is provided.

 bitbake/lib/bb/runqueue.py | 38 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

Comments

Richard Purdie July 4, 2022, 9:41 p.m. UTC | #1
On Mon, 2022-07-04 at 16:44 -0400, Aryaman Gupta wrote:
> Stop the scheduler from starting new tasks if the current cpu or io
> pressure is above a certain threshold, specified through the
> "BB_PRESSURE_MAX_SOME_{CPU|IO}" variables in conf/local.conf.
> 
> If the thresholds aren't specified, the default values are 100 for both
> CPU and IO, which will have no impact on build times.
> Arbitary lower limit of 1.0 results in a fatal error to avoid extremely
> long builds. If the percentage limits are higher than 100, then the
> default values are used and warnings are issued to inform users that the
> specified limit is out of bounds.
> 
> Signed-off-by: Aryaman Gupta <aryaman.gupta@windriver.com>
> Signed-off-by: Randy Macleod <randy.macleod@windriver.com>
> ---
> 
> * Changes in V2:
> - Replace subprocess() calls with open()
> - Rename BB variables to BB_PRESSURE_MAX_SOME_{CPU|IO}
> - Skip the checking of pressure when no value is provided.

This looks much better thanks!

Some small details below.

> 
>  bitbake/lib/bb/runqueue.py | 38 ++++++++++++++++++++++++++++++++++++++
>  1 file changed, 38 insertions(+)
> 
> diff --git a/bitbake/lib/bb/runqueue.py b/bitbake/lib/bb/runqueue.py
> index 1e47fe70ef..0500211c8d 100644
> --- a/bitbake/lib/bb/runqueue.py
> +++ b/bitbake/lib/bb/runqueue.py
> @@ -159,6 +159,26 @@ class RunQueueScheduler(object):
>                  self.buildable.append(tid)
>  
>          self.rev_prio_map = None
> +        # Some hosts like openSUSE have readable /proc/pressure files
> +        # but throw errors when these files are opened.
> +        try:
> +            with open("/proc/pressure/cpu") as cpu_pressure_fds, open("/proc/pressure/io") as io_pressure_fds:
> +                cpu_pressure_fds.read()
> +                io_pressure_fds.read()
> +            self.readable_pressure_files = True
> +        except:
> +            if self.rq.max_cpu_pressure!=100 or self.rq.max_io_pressure!=100:

See below but this can just become "if self.rq.max_cpu_pressure or self.rq.max_io_pressure:".

Also, the correct whitespace here is "pressure != 100" which is
standard python code style. We're not perfect in bitbake but we do try
and keep to that guideline.

> +                bb.warn("The /proc/pressure files can't be read. Continuing build without monitoring pressure")
> +            self.readable_pressure_files = False
> +
> +    def exceeds_max_pressure(self):
> +        if self.readable_pressure_files and self.rq.max_cpu_pressure<100 and self.rq.max_io_pressure<100:

again, see below, we can drop the <100 (and there is again a whitespace
issue).

What happens if I only set one of the values though?

> +            # extract avg10 from /proc/pressure/{cpu|io}
> +            with open("/proc/pressure/cpu") as cpu_pressure_fds, open("/proc/pressure/io") as io_pressure_fds:
> +                curr_cpu_pressure = cpu_pressure_fds.readline().split()[1].split("=")[1]
> +                curr_io_pressure = io_pressure_fds.readline().split()[1].split("=")[1]
> +            return float(curr_cpu_pressure) > self.rq.max_cpu_pressure or float(curr_io_pressure) > self.rq.max_io_pressure
> +        return False
>  
>      def next_buildable_task(self):
>          """
> @@ -171,6 +191,8 @@ class RunQueueScheduler(object):
>          buildable.intersection_update(self.rq.tasks_covered | self.rq.tasks_notcovered)
>          if not buildable:
>              return None
> +        if self.exceeds_max_pressure():
> +            return None
>  
>          # Filter out tasks that have a max number of threads that have been exceeded
>          skip_buildable = {}
> @@ -1699,6 +1721,8 @@ class RunQueueExecute:
>  
>          self.number_tasks = int(self.cfgData.getVar("BB_NUMBER_THREADS") or 1)
>          self.scheduler = self.cfgData.getVar("BB_SCHEDULER") or "speed"
> +        self.max_cpu_pressure = float(self.cfgData.getVar("BB_PRESSURE_MAX_SOME_CPU") or 100.0)
> +        self.max_io_pressure = float(self.cfgData.getVar("BB_PRESSURE_MAX_SOME_IO") or 100.0)

instead of "or 100.0", just do "or None" as that is the pythonic way
and then other code can be simpler with no magic values.

Cheers,

Richard

Patch

diff --git a/bitbake/lib/bb/runqueue.py b/bitbake/lib/bb/runqueue.py
index 1e47fe70ef..0500211c8d 100644
--- a/bitbake/lib/bb/runqueue.py
+++ b/bitbake/lib/bb/runqueue.py
@@ -159,6 +159,26 @@  class RunQueueScheduler(object):
                 self.buildable.append(tid)
 
         self.rev_prio_map = None
+        # Some hosts like openSUSE have readable /proc/pressure files
+        # but throw errors when these files are opened.
+        try:
+            with open("/proc/pressure/cpu") as cpu_pressure_fds, open("/proc/pressure/io") as io_pressure_fds:
+                cpu_pressure_fds.read()
+                io_pressure_fds.read()
+            self.readable_pressure_files = True
+        except:
+            if self.rq.max_cpu_pressure!=100 or self.rq.max_io_pressure!=100:
+                bb.warn("The /proc/pressure files can't be read. Continuing build without monitoring pressure")
+            self.readable_pressure_files = False
+
+    def exceeds_max_pressure(self):
+        if self.readable_pressure_files and self.rq.max_cpu_pressure<100 and self.rq.max_io_pressure<100:
+            # extract avg10 from /proc/pressure/{cpu|io}
+            with open("/proc/pressure/cpu") as cpu_pressure_fds, open("/proc/pressure/io") as io_pressure_fds:
+                curr_cpu_pressure = cpu_pressure_fds.readline().split()[1].split("=")[1]
+                curr_io_pressure = io_pressure_fds.readline().split()[1].split("=")[1]
+            return float(curr_cpu_pressure) > self.rq.max_cpu_pressure or float(curr_io_pressure) > self.rq.max_io_pressure
+        return False
 
     def next_buildable_task(self):
         """
@@ -171,6 +191,8 @@  class RunQueueScheduler(object):
         buildable.intersection_update(self.rq.tasks_covered | self.rq.tasks_notcovered)
         if not buildable:
             return None
+        if self.exceeds_max_pressure():
+            return None
 
         # Filter out tasks that have a max number of threads that have been exceeded
         skip_buildable = {}
@@ -1699,6 +1721,8 @@  class RunQueueExecute:
 
         self.number_tasks = int(self.cfgData.getVar("BB_NUMBER_THREADS") or 1)
         self.scheduler = self.cfgData.getVar("BB_SCHEDULER") or "speed"
+        self.max_cpu_pressure = float(self.cfgData.getVar("BB_PRESSURE_MAX_SOME_CPU") or 100.0)
+        self.max_io_pressure = float(self.cfgData.getVar("BB_PRESSURE_MAX_SOME_IO") or 100.0)
 
         self.sq_buildable = set()
         self.sq_running = set()
@@ -1733,6 +1757,20 @@  class RunQueueExecute:
         if self.number_tasks <= 0:
              bb.fatal("Invalid BB_NUMBER_THREADS %s" % self.number_tasks)
 
+        lower_limit = 1.0
+        upper_limit = 100.0
+        if self.max_cpu_pressure < lower_limit:
+            bb.fatal("Invalid BB_PRESSURE_MAX_SOME_CPU %s, minimum value is %s" % (self.max_cpu_pressure, lower_limit))
+        if self.max_cpu_pressure > upper_limit:
+            bb.warn("Percentage value of BB_PRESSURE_MAX_SOME_CPU %s rounded down to %s" % (self.max_cpu_pressure, upper_limit))
+            self.max_cpu_pressure = upper_limit
+
+        if self.max_io_pressure < lower_limit:
+            bb.fatal("Invalid BB_PRESSURE_MAX_SOME_IO %s, minimum value is %s" % (self.max_io_pressure, lower_limit))
+        if self.max_io_pressure > upper_limit:
+            bb.warn("Percentage value of BB_PRESSURE_MAX_SOME_IO %s rounded down to %s" % (self.max_io_pressure, upper_limit))
+            self.max_io_pressure = upper_limit
+
         # List of setscene tasks which we've covered
         self.scenequeue_covered = set()
         # List of tasks which are covered (including setscene ones)