Skip to content

diagnostics: fix various issues #11295

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Nov 21, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ is required to enable Fluentd to look up pod metadata for the logs it gathers.
As a user with a cluster-admin role, you can grant the permissions by running
the following:

oadm policy add-cluster-role-to-user cluster-reader system:serviceaccount:%[2]s:%[1]s
$ oadm policy add-cluster-role-to-user cluster-reader system:serviceaccount:%[2]s:%[1]s
`

func checkClusterRoleBindings(r diagnosticReporter, adapter clusterRoleBindingsAdapter, project string) {
Expand Down
12 changes: 6 additions & 6 deletions pkg/diagnostics/cluster/aggregated_logging/daemonsets.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,18 @@ There are no nodes that match the selector for DaemonSet '%[1]s'. This
means Fluentd is not running and is not gathering logs from any nodes.
An example of a command to target a specific node for this DaemonSet:

oc label node/node1.example.com %[2]s
$ oc label node/node1.example.com %[2]s

or to label them all:

oc label node --all %[2]s
$ oc label node --all %[2]s
`

const daemonSetPartialNodesLabeled = `
There are some nodes that match the selector for DaemonSet '%s'.
A list of matching nodes can be discovered by running:

oc get nodes -l %s
$ oc get nodes -l %s
`
const daemonSetNoPodsFound = `
There were no pods found that match DaemonSet '%s' with matchLabels '%s'
Expand All @@ -36,9 +36,9 @@ Depending upon the state, this could mean there is an error running the image
for one or more pod containers, the node could be pulling images, etc. Try running
the following commands to get additional information:

oc describe pod %[1]s -n %[5]s
oc logs %[1]s -n %[5]s
oc get events -n %[5]s
$ oc describe pod %[1]s -n %[5]s
$ oc logs %[1]s -n %[5]s
$ oc get events -n %[5]s
`
const daemonSetNotFound = `
There were no DaemonSets in project '%s' that included label '%s'. This implies
Expand Down
14 changes: 7 additions & 7 deletions pkg/diagnostics/cluster/aggregated_logging/deploymentconfigs.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,15 @@ const deploymentConfigZeroPodsFound = `
There were no Pods found that support logging. Try running
the following commands for additional information:

oc describe dc -n %[1]s
oc get events -n %[1]s
$ oc describe dc -n %[1]s
$ oc get events -n %[1]s
`
const deploymentConfigNoPodsFound = `
There were no Pods found for DeploymentConfig '%[1]s'. Try running
the following commands for additional information:

oc describe dc %[1]s -n %[2]s
oc get events -n %[2]s
$ oc describe dc %[1]s -n %[2]s
$ oc get events -n %[2]s
`
const deploymentConfigPodsNotRunning = `
The Pod '%[1]s' matched by DeploymentConfig '%[2]s' is not in '%[3]s' status: %[4]s.
Expand All @@ -50,9 +50,9 @@ Depending upon the state, this could mean there is an error running the image
for one or more pod containers, the node could be pulling images, etc. Try running
the following commands for additional information:

oc describe pod %[1]s -n %[5]s
oc logs %[1]s -n %[5]s
oc get events -n %[5]s
$ oc describe pod %[1]s -n %[5]s
$ oc logs %[1]s -n %[5]s
$ oc get events -n %[5]s
`

func checkDeploymentConfigs(r diagnosticReporter, adapter deploymentConfigAdapter, project string) {
Expand Down
7 changes: 5 additions & 2 deletions pkg/diagnostics/cluster/aggregated_logging/diagnostic.go
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,10 @@ func (d *AggregatedLogging) CanRun() (bool, error) {
var err error
d.masterConfig, err = hostdiag.GetMasterConfig(d.result, d.MasterConfigFile)
if err != nil {
return false, errors.New("Unreadable master config; skipping this diagnostic.")
return false, errors.New("Master configuration is unreadable")
}
if d.masterConfig.AssetConfig.LoggingPublicURL == "" {
return false, errors.New("No LoggingPublicURL is defined in the master configuration")
}
return true, nil
}
Expand All @@ -150,7 +153,7 @@ The project '%[1]s' was found with either a missing or non-empty node selector a
This could keep Fluentd from running on certain nodes and collecting logs from the entire cluster.
You can correct it by editing the project:

oc edit namespace %[1]s
$ oc edit namespace %[1]s

and updating the annotation:

Expand Down
2 changes: 1 addition & 1 deletion pkg/diagnostics/cluster/aggregated_logging/routes.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ An unaccepted route is most likely due to one of the following reasons:
If a router has been deployed, look for duplicate matching routes by
running the following:

oc get --all-namespaces routes --template='{{range .items}}{{if eq .spec.host "%[2]s"}}{{println .metadata.name "in" .metadata.namespace}}{{end}}{{end}}'
$ oc get --all-namespaces routes --template='{{range .items}}{{if eq .spec.host "%[2]s"}}{{println .metadata.name "in" .metadata.namespace}}{{end}}{{end}}'

`
const routeCertMissingHostName = `
Expand Down
2 changes: 1 addition & 1 deletion pkg/diagnostics/cluster/aggregated_logging/scc.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ The ServiceAccount '%[1]s' does not have a privileged SecurityContextConstraint
user with a cluster-admin role, you can grant the permissions by running
the following:

oadm policy add-scc-to-user privileged system:serviceaccount:%[2]s:%[1]s
$ oadm policy add-scc-to-user privileged system:serviceaccount:%[2]s:%[1]s
`

func checkSccs(r diagnosticReporter, adapter sccAdapter, project string) {
Expand Down
35 changes: 31 additions & 4 deletions pkg/diagnostics/cluster/roles.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,34 @@ type ClusterRoles struct {
}

const (
ClusterRolesName = "ClusterRoles"
ClusterRolesName = "ClusterRoles"
clusterRoleMissing = `
clusterrole/%s is missing.

Use the 'oadm policy reconcile-cluster-roles' command to create the role. For example,

$ oadm policy reconcile-cluster-roles \
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For consistency we should probably update all places to include '$' in the other messages found in AGL diagnostics

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sounds good

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The CLI Hacking Guide says no $:
https://github.com/openshift/origin/blob/master/docs/cli_hacking_guide.adoc#writing-examples

And that's what most if not all commands follow.

Wouldn't it be the case for oc diagnostics too?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't see where it explicitly says to not use dollar signs but I think this example is subtly different. Here we are suggesting a command you can run to resolve a discovered issue. We are not suggesting an example of running a diagnostics command. Regardless, I suggest we either add '$' or remove them to be consistent with places we have used them before.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We removed all $ prefixes back in May to follow the style in Kubernetes: #8454.

I'm not sure if it's the case here, but for the CLI commands those comments become part of the generated docs, so I agree with you that we need to be consistent.

Copy link
Contributor

@rhcarvalho rhcarvalho Nov 7, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure if it's the case here,

Most probably not the case... these seem to be just constants that I believe are printed as output of the diagnostics.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I could argue that bit of the CLI hacking guide is only about the command usage statement. I think diagnostics is the only place that has output related to running other commands entirely. It's a gray area; I think it is clearer with the $ to indicate commands; what do you think?

--additive-only=true --confirm
`
clusterRoleReduced = `
clusterrole/%s has changed, but the existing role has more permissions than the new role.

If you can confirm that the extra permissions are not required, you may use the
'oadm policy reconcile-cluster-roles' command to update the role to reduce permissions.
For example,

$ oadm policy reconcile-cluster-roles \
--additive-only=false --confirm
`
clusterRoleChanged = `
clusterrole/%s has changed and the existing role does not have enough permissions.

Use the 'oadm policy reconcile-cluster-roles' command to update the role.
For example,

$ oadm policy reconcile-cluster-roles \
--additive-only=true --confirm
`
)

func (d *ClusterRoles) Name() string {
Expand Down Expand Up @@ -70,7 +97,7 @@ func (d *ClusterRoles) Check() types.DiagnosticResult {
for _, changedClusterRole := range changedClusterRoles {
actualClusterRole, err := d.ClusterRolesClient.ClusterRoles().Get(changedClusterRole.Name)
if kerrs.IsNotFound(err) {
r.Error("CRD1002", nil, fmt.Sprintf("clusterrole/%s is missing.\n\nUse the `oadm policy reconcile-cluster-roles` command to create the role.", changedClusterRole.Name))
r.Error("CRD1002", nil, fmt.Sprintf(clusterRoleMissing, changedClusterRole.Name))
continue
}
if err != nil {
Expand All @@ -79,15 +106,15 @@ func (d *ClusterRoles) Check() types.DiagnosticResult {

_, missingRules := rulevalidation.Covers(actualClusterRole.Rules, changedClusterRole.Rules)
if len(missingRules) == 0 {
r.Warn("CRD1003", nil, fmt.Sprintf("clusterrole/%s has changed, but the existing role has more permissions than the new role.\n\nUse the `oadm policy reconcile-cluster-roles` command to update the role to reduce permissions.", changedClusterRole.Name))
r.Info("CRD1003", fmt.Sprintf(clusterRoleReduced, changedClusterRole.Name))
_, extraRules := rulevalidation.Covers(changedClusterRole.Rules, actualClusterRole.Rules)
for _, extraRule := range extraRules {
r.Info("CRD1008", fmt.Sprintf("clusterrole/%s has extra permission %v.", changedClusterRole.Name, extraRule))
}
continue
}

r.Error("CRD1005", nil, fmt.Sprintf("clusterrole/%s has changed and the existing role does not have enough permissions.\n\nUse the `oadm policy reconcile-cluster-roles` command to update the role.", changedClusterRole.Name))
r.Error("CRD1005", nil, fmt.Sprintf(clusterRoleChanged, changedClusterRole.Name))
for _, missingRule := range missingRules {
r.Info("CRD1007", fmt.Sprintf("clusterrole/%s is missing permission %v.", changedClusterRole.Name, missingRule))
}
Expand Down