Message ID | 20230407030715.4394-2-Qi.Chen@windriver.com |
---|---|
State | New |
Headers | show |
Series | [bitbake-devel,1/2] runqueue: fix PSI check calculation | expand |
On 2023-04-06 23:07, Chen Qi via lists.openembedded.org wrote: > According to kernel PSI doc[1], for /proc/pressure/* interface files, > the first line is the 'some' line and the second line is the 'full' > line. > > Quoting from the doc: > """ > The "some" line indicates the share of time in which at least some tasks > are stalled on a given resource. > > The "full" line indicates the share of time in which all non-idle tasks > are stalled on a given resource simultaneously. In this state actual CPU > cycles are going to waste, and a workload that spends extended time in this > state is considered to be thrashing. This has severe impact on performance, > and it’s useful to distinguish this situation from a state where some tasks > are stalled but the CPU is still doing productive work. > """ > > We can see that the 'full' line is a better measurement to check if things are > slowed down as a whole. > > Also, the /proc/pressure/cpu's 'full' line may not be available on some systems, > so fall back to the 'some' line. > > [1]https://www.kernel.org/doc/html/latest/accounting/psi.html NACK... I'd need to dig up or reproduce some test cases but I believe that we should NOT use 'full' instead of 'some'. We want to have a *lower* threshold to defer work when the system resources are experiencing pressure. We don't want to wait for the system to get to a state where all of the resources are under pressure. There may be use cases involving containers where the 'full' PSI info is more useful or even essential but need a motivating example. ../Randy > > Signed-off-by: Chen Qi<Qi.Chen@windriver.com> > --- > bitbake/lib/bb/runqueue.py | 23 +++++++++++++++-------- > 1 file changed, 15 insertions(+), 8 deletions(-) > > diff --git a/bitbake/lib/bb/runqueue.py b/bitbake/lib/bb/runqueue.py > index 02f1474540..e3198dcdeb 100644 > --- a/bitbake/lib/bb/runqueue.py > +++ b/bitbake/lib/bb/runqueue.py > @@ -173,10 +173,13 @@ class RunQueueScheduler(object): > with open("/proc/pressure/cpu") as cpu_pressure_fds, \ > open("/proc/pressure/io") as io_pressure_fds, \ > open("/proc/pressure/memory") as memory_pressure_fds: > - > - self.prev_cpu_pressure = cpu_pressure_fds.readline().split()[4].split("=")[1] > - self.prev_io_pressure = io_pressure_fds.readline().split()[4].split("=")[1] > - self.prev_memory_pressure = memory_pressure_fds.readline().split()[4].split("=")[1] > + cpu_lines = cpu_pressure_fds.readlines() > + if len(cpu_lines) == 1: > + self.prev_cpu_pressure = cpu_lines[0].split()[4].split("=")[1] > + else: > + self.prev_cpu_pressure = cpu_lines[1].split()[4].split("=")[1] > + self.prev_io_pressure = io_pressure_fds.readlines()[1].split()[4].split("=")[1] > + self.prev_memory_pressure = memory_pressure_fds.readlines()[1].split()[4].split("=")[1] > self.prev_pressure_time = time.time() > self.check_pressure = True > except: > @@ -194,10 +197,14 @@ class RunQueueScheduler(object): > with open("/proc/pressure/cpu") as cpu_pressure_fds, \ > open("/proc/pressure/io") as io_pressure_fds, \ > open("/proc/pressure/memory") as memory_pressure_fds: > - # extract "total" from /proc/pressure/{cpu|io} > - curr_cpu_pressure = cpu_pressure_fds.readline().split()[4].split("=")[1] > - curr_io_pressure = io_pressure_fds.readline().split()[4].split("=")[1] > - curr_memory_pressure = memory_pressure_fds.readline().split()[4].split("=")[1] > + # extract "total" from /proc/pressure/{cpu|io|memory} > + cpu_lines = cpu_pressure_fds.readlines() > + if len(cpu_lines) == 1: > + curr_cpu_pressure = cpu_lines[0].split()[4].split("=")[1] > + else: > + curr_cpu_pressure = cpu_lines[1].split()[4].split("=")[1] > + curr_io_pressure = io_pressure_fds.readlines()[1].split()[4].split("=")[1] > + curr_memory_pressure = memory_pressure_fds.readlines()[1].split()[4].split("=")[1] > now = time.time() > tdiff = now - self.prev_pressure_time > if tdiff > 1.0: > > -=-=-=-=-=-=-=-=-=-=-=- > Links: You receive all messages sent to this group. > View/Reply Online (#14683):https://lists.openembedded.org/g/bitbake-devel/message/14683 > Mute This Topic:https://lists.openembedded.org/mt/98118923/3616765 > Group Owner:bitbake-devel+owner@lists.openembedded.org > Unsubscribe:https://lists.openembedded.org/g/bitbake-devel/unsub [randy.macleod@windriver.com] > -=-=-=-=-=-=-=-=-=-=-=- >
diff --git a/bitbake/lib/bb/runqueue.py b/bitbake/lib/bb/runqueue.py index 02f1474540..e3198dcdeb 100644 --- a/bitbake/lib/bb/runqueue.py +++ b/bitbake/lib/bb/runqueue.py @@ -173,10 +173,13 @@ class RunQueueScheduler(object): with open("/proc/pressure/cpu") as cpu_pressure_fds, \ open("/proc/pressure/io") as io_pressure_fds, \ open("/proc/pressure/memory") as memory_pressure_fds: - - self.prev_cpu_pressure = cpu_pressure_fds.readline().split()[4].split("=")[1] - self.prev_io_pressure = io_pressure_fds.readline().split()[4].split("=")[1] - self.prev_memory_pressure = memory_pressure_fds.readline().split()[4].split("=")[1] + cpu_lines = cpu_pressure_fds.readlines() + if len(cpu_lines) == 1: + self.prev_cpu_pressure = cpu_lines[0].split()[4].split("=")[1] + else: + self.prev_cpu_pressure = cpu_lines[1].split()[4].split("=")[1] + self.prev_io_pressure = io_pressure_fds.readlines()[1].split()[4].split("=")[1] + self.prev_memory_pressure = memory_pressure_fds.readlines()[1].split()[4].split("=")[1] self.prev_pressure_time = time.time() self.check_pressure = True except: @@ -194,10 +197,14 @@ class RunQueueScheduler(object): with open("/proc/pressure/cpu") as cpu_pressure_fds, \ open("/proc/pressure/io") as io_pressure_fds, \ open("/proc/pressure/memory") as memory_pressure_fds: - # extract "total" from /proc/pressure/{cpu|io} - curr_cpu_pressure = cpu_pressure_fds.readline().split()[4].split("=")[1] - curr_io_pressure = io_pressure_fds.readline().split()[4].split("=")[1] - curr_memory_pressure = memory_pressure_fds.readline().split()[4].split("=")[1] + # extract "total" from /proc/pressure/{cpu|io|memory} + cpu_lines = cpu_pressure_fds.readlines() + if len(cpu_lines) == 1: + curr_cpu_pressure = cpu_lines[0].split()[4].split("=")[1] + else: + curr_cpu_pressure = cpu_lines[1].split()[4].split("=")[1] + curr_io_pressure = io_pressure_fds.readlines()[1].split()[4].split("=")[1] + curr_memory_pressure = memory_pressure_fds.readlines()[1].split()[4].split("=")[1] now = time.time() tdiff = now - self.prev_pressure_time if tdiff > 1.0:
According to kernel PSI doc[1], for /proc/pressure/* interface files, the first line is the 'some' line and the second line is the 'full' line. Quoting from the doc: """ The "some" line indicates the share of time in which at least some tasks are stalled on a given resource. The "full" line indicates the share of time in which all non-idle tasks are stalled on a given resource simultaneously. In this state actual CPU cycles are going to waste, and a workload that spends extended time in this state is considered to be thrashing. This has severe impact on performance, and it’s useful to distinguish this situation from a state where some tasks are stalled but the CPU is still doing productive work. """ We can see that the 'full' line is a better measurement to check if things are slowed down as a whole. Also, the /proc/pressure/cpu's 'full' line may not be available on some systems, so fall back to the 'some' line. [1] https://www.kernel.org/doc/html/latest/accounting/psi.html Signed-off-by: Chen Qi <Qi.Chen@windriver.com> --- bitbake/lib/bb/runqueue.py | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-)